diff -Nru mesa-18.1.3/configure mesa-18.1.5/configure --- mesa-18.1.3/configure 2018-06-29 17:47:37.000000000 +0000 +++ mesa-18.1.5/configure 2018-07-27 13:52:25.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for Mesa 18.1.3. +# Generated by GNU Autoconf 2.69 for Mesa 18.1.5. # # Report bugs to . # @@ -591,8 +591,8 @@ # Identity of this package. PACKAGE_NAME='Mesa' PACKAGE_TARNAME='mesa' -PACKAGE_VERSION='18.1.3' -PACKAGE_STRING='Mesa 18.1.3' +PACKAGE_VERSION='18.1.5' +PACKAGE_STRING='Mesa 18.1.5' PACKAGE_BUGREPORT='https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa' PACKAGE_URL='' @@ -1866,7 +1866,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures Mesa 18.1.3 to adapt to many kinds of systems. +\`configure' configures Mesa 18.1.5 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1937,7 +1937,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of Mesa 18.1.3:";; + short | recursive ) echo "Configuration of Mesa 18.1.5:";; esac cat <<\_ACEOF @@ -2309,7 +2309,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -Mesa configure 18.1.3 +Mesa configure 18.1.5 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -3028,7 +3028,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by Mesa $as_me 18.1.3, which was +It was created by Mesa $as_me 18.1.5, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -4003,7 +4003,7 @@ # Define the identity of the package. PACKAGE='mesa' - VERSION='18.1.3' + VERSION='18.1.5' cat >>confdefs.h <<_ACEOF @@ -21862,6 +21862,12 @@ done +ac_fn_c_check_header_mongrel "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default" +if test "x$ac_cv_header_dlfcn_h" = xyes; then : + DEFINES="$DEFINES -DHAVE_DLFCN_H" +fi + + ac_fn_c_check_func "$LINENO" "strtof" "ac_cv_func_strtof" if test "x$ac_cv_func_strtof" = xyes; then : DEFINES="$DEFINES -DHAVE_STRTOF" @@ -30579,7 +30585,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by Mesa $as_me 18.1.3, which was +This file was extended by Mesa $as_me 18.1.5, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -30636,7 +30642,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -Mesa config.status 18.1.3 +Mesa config.status 18.1.5 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff -Nru mesa-18.1.3/configure.ac mesa-18.1.5/configure.ac --- mesa-18.1.3/configure.ac 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/configure.ac 2018-07-27 13:52:17.000000000 +0000 @@ -880,6 +880,7 @@ AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"]) AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"]) AC_CHECK_HEADERS([endian.h]) +AC_CHECK_HEADER([dlfcn.h], [DEFINES="$DEFINES -DHAVE_DLFCN_H"]) AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"]) AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"]) AC_CHECK_FUNC([timespec_get], [DEFINES="$DEFINES -DHAVE_TIMESPEC_GET"]) diff -Nru mesa-18.1.3/debian/changelog mesa-18.1.5/debian/changelog --- mesa-18.1.3/debian/changelog 2018-07-30 14:01:24.000000000 +0000 +++ mesa-18.1.5/debian/changelog 2018-07-30 14:01:26.000000000 +0000 @@ -1,3 +1,25 @@ +mesa (18.1.5-1ubuntu1) cosmic; urgency=medium + + * Merge from Debian. + + -- Timo Aaltonen Mon, 30 Jul 2018 16:59:04 +0300 + +mesa (18.1.5-1) unstable; urgency=medium + + * New upstream release. + + -- Timo Aaltonen Mon, 30 Jul 2018 14:30:06 +0300 + +mesa (18.1.4-1) unstable; urgency=medium + + [ Emilio Pozuelo Monfort ] + * New upstream release. + + [ Simon McVittie ] + * Make libwayland-dev Build-Depends consistent with -dev Depends + + -- Emilio Pozuelo Monfort Sun, 15 Jul 2018 12:59:44 +0200 + mesa (18.1.3-1ubuntu1) cosmic; urgency=medium * Merge from Debian. diff -Nru mesa-18.1.3/debian/control mesa-18.1.5/debian/control --- mesa-18.1.3/debian/control 2018-07-30 14:01:24.000000000 +0000 +++ mesa-18.1.5/debian/control 2018-07-30 14:01:26.000000000 +0000 @@ -43,7 +43,7 @@ bison, llvm-6.0-dev (>= 1:6.0~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], libelf-dev [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], - libwayland-dev (>= 1.11.0) [linux-any], + libwayland-dev (>= 1.15.0) [linux-any], libclang-6.0-dev (>= 1:6.0~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], libclc-dev (>= 0.2.0+git20180312-1~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], wayland-protocols (>= 1.9), diff -Nru mesa-18.1.3/docs/relnotes/18.1.3.html mesa-18.1.5/docs/relnotes/18.1.3.html --- mesa-18.1.3/docs/relnotes/18.1.3.html 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/docs/relnotes/18.1.3.html 2018-07-27 13:52:17.000000000 +0000 @@ -31,8 +31,8 @@

SHA256 checksums

-TBD  mesa-18.1.3.tar.gz
-TBD  mesa-18.1.3.tar.xz
+2a1e36280d01ad18ba6d5b3fbd653ceaa109eaa031b78eb5dfaa4df452742b66  mesa-18.1.3.tar.gz
+54f08deeda0cd2f818e8d40140040ed013de7852573002453b7f50da9ea738ce  mesa-18.1.3.tar.xz
 
diff -Nru mesa-18.1.3/docs/relnotes/18.1.4.html mesa-18.1.5/docs/relnotes/18.1.4.html --- mesa-18.1.3/docs/relnotes/18.1.4.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-18.1.5/docs/relnotes/18.1.4.html 2018-07-27 13:52:17.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 18.1.4 Release Notes / July 13 2018

+ +

+Mesa 18.1.4 is a bug fix release which fixes bugs found since the 18.1.3 release. +

+

+Mesa 18.1.4 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

+ + +

SHA256 checksums

+
+SHA256: 8acd42e4ac4d1e96ed22344073b3d4fef03d10f225f4eaf3f88c001dfc10e2db  mesa-18.1.4.tar.gz
+SHA256: 3061488b5d85504092cf4343816cfb2d96f2ad9bc2edec31fc96933d184cf58b  mesa-18.1.4.tar.xz
+
+ + +

New features

+ +

None

+ +

Bug fixes

+
    + +
  • Bug 106906 - Failed to recongnize keyword “sampler2DRect” and "sampler2DRectShadow"
  • + +
  • Bug 106928 - When starting a match Rocket League crashes on "Go"
  • + +
  • Bug 107193 - piglit.spec.arb_compute_shader.linker.bug-93840 fails
  • + +
+ +

Changes

+

Adam Jackson (1):

+
    +
  • glx: Don't allow glXMakeContextCurrent() with only one valid drawable
  • +
+ +

Dave Airlie (1):

+
    +
  • r600/sb: cleanup if_conversion iterator to be legal C++
  • +
+ +

Dylan Baker (2):

+
    +
  • docs: Add SHA256 sums to notes for 18.1.3
  • +
  • Bump version for release
  • +
+ +

Iago Toral Quiroga (3):

+
    +
  • anv/cmd_buffer: make descriptors dirty when emitting base state address
  • +
  • anv/cmd_buffer: clean dirty push constants flag after emitting push constants
  • +
  • anv/cmd_buffer: never shrink the push constant buffer size
  • +
+ +

Ian Romanick (4):

+
    +
  • i965/vec4: Don't cmod propagate from CMP to ADD if the writemask isn't compatible
  • +
  • intel/compiler: Relax mixed type restriction for saturating immediates
  • +
  • i965/vec4: Properly handle sign(-abs(x))
  • +
  • i965/fs: Properly handle sign(-abs(x))
  • +
+ +

Jason Ekstrand (3):

+
    +
  • intel/fs: Split instructions low to high in lower_simd_width
  • +
  • anv: Be more careful about hashing pipeline layouts
  • +
  • intel/fs: Mark LINTERP opcode as writing accumulator on platforms without PLN
  • +
+ +

Jose Maria Casanova Crespo (3):

+
    +
  • i965/fs: Register allocator shoudn't use grf127 for sends dest
  • +
  • intel/compiler: grf127 can not be dest when src and dest overlap in send
  • +
  • i965/fs: unspills shoudn't use grf127 as dest since Gen8+
  • +
+ +

Lionel Landwerlin (1):

+
    +
  • i965: fix clear color bo address relocation
  • +
+ +

Marek Olšák (3):

+
    +
  • radeonsi: fix memory exhaustion issue with DCC statistics gathering with DRI2
  • +
  • glsl/cache: save and restore ExternalSamplersUsed
  • +
  • st/dri: fix a crash in server_wait_sync
  • +
+ +

Neil Roberts (1):

+
    +
  • i965: Fix output register sizes when variable ranges are interleaved
  • +
+ +

Rhys Perry (1):

+
    +
  • nvc0/ir: fix TargetNVC0::insnCanLoadOffset()
  • +
+ +

Roland Scheidegger (1):

+
    +
  • r600/sb: fix crash in fold_alu_op3
  • +
+ +

Ross Burton (1):

+
    +
  • egl: fix build race in automake
  • +
+ +

Samuel Pitoiset (1):

+
    +
  • radv: fix emitting the view index on GFX9
  • +
+ +

Timothy Arceri (2):

+
    +
  • glsl: skip comparison opt when adding vars of different size
  • +
  • nir: fix selection of loop terminator when two or more have the same limit
  • +
+ +

zhaowei yuan (1):

+
    +
  • glsl: Treat sampler2DRect and sampler2DRectShadow as reserved in ES2
  • +
+ +
+ + diff -Nru mesa-18.1.3/docs/relnotes/18.1.5.html mesa-18.1.5/docs/relnotes/18.1.5.html --- mesa-18.1.3/docs/relnotes/18.1.5.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-18.1.5/docs/relnotes/18.1.5.html 2018-07-27 13:52:17.000000000 +0000 @@ -0,0 +1,183 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 18.1.4 Release Notes / July 13 2018

+ +

+Mesa 18.1.5 is a bug fix release which fixes bugs found since the 18.1.4 release. +

+

+Mesa 18.1.5 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

+ + +

SHA256 checksums

+
+TBD
+TBD
+
+ + +

New features

+ +

None

+ +

Bug fixes

+
    + +
  • Bug 103274 - BRW allocates too much heap memory
  • + +
  • Bug 107275 - NIR segfaults after spirv-opt
  • + +
  • Bug 107295 - Access violation on glDrawArrays with count >= 2048
  • + +
  • Bug 107312 - Mesa-git RPM build fails after commit 8cacf38f527d42e41441ef8c25d95d4b2f4e8602
  • + +
  • Bug 107366 - NIR verification crashes on piglit tests
  • + +
+ +

Changes

+

Alex Smith (1):

+
    +
  • anv: Pay attention to VK_ACCESS_MEMORY_(READ|WRITE)_BIT
  • +
+ +

Bas Nieuwenhuizen (7):

+
    +
  • radv: Select correct entries for binning.
  • +
  • radv: Fix number of samples used for binning.
  • +
  • radv: Disable disabled color buffers in rbplus opts.
  • +
  • nir: Do not use continue block after removing it.
  • +
  • util/disk_cache: Fix disk_cache_get_function_timestamp with disabled cache.
  • +
  • nir: Fix end of function without return warning/error.
  • +
  • radv: Still enable inmemory & API level caching if disk cache is not enabled.
  • +
+ +

Chad Versace (2):

+
    +
  • anv/android: Fix type error in call to vk_errorf()
  • +
  • anv/android: Fix Autotools build for VK_ANDROID_native_buffer
  • +
+ +

Chih-Wei Huang (1):

+
    +
  • Android: fix a missing nir_intrinsics.h error
  • +
+ +

Danylo Piliaiev (1):

+
    +
  • i965: Sweep NIR after linking phase to free held memory
  • +
+ +

Dave Airlie (1):

+
    +
  • r600: enable tess_input_info for TES
  • +
+ +

Dylan Baker (5):

+
    +
  • docs: Add sha256 sums for 18.1.4 tarballs
  • +
  • cherry-ignore: add 4a67ce886a7b3def5f66c1aedf9e5436d157a03c
  • +
  • cherry-ignore: Add 1f616a840eac02241c585d28e9dac8f19a297f39
  • +
  • cherry-ignore: add 11712b9ca17e4e1a819dcb7d020e19c6da77bc90
  • +
  • bump version to 18.1.5
  • +
+ +

Eric Anholt (2):

+
    +
  • vc4: Don't automatically reallocate a PERSISTENT-mapped buffer.
  • +
  • meson: Move xvmc test tools from unit tests to installed tools.
  • +
+ +

Harish Krupo (1):

+
    +
  • egl: Fix missing clamping in eglSetDamageRegionKHR
  • +
+ +

Jan Vesely (3):

+
    +
  • radeonsi: Refuse to accept code with unhandled relocations
  • +
  • clover: Report error when pipe driver fails to create compute state
  • +
  • clover: Catch errors from executing event action
  • +
+ +

Jason Ekstrand (6):

+
    +
  • anv: Stop setting 3DSTATE_PS_EXTRA::PixelShaderHasUAV
  • +
  • nir/serialize: Alloc constants off the variable
  • +
  • blorp: Handle the RGB workaround more like other workarounds
  • +
  • intel/blorp: Handle 3-component formats in clears
  • +
  • intel/compiler: Account for built-in uniforms in analyze_ubo_ranges
  • +
  • spirv: Fix a couple of image atomic load/store bugs
  • +
+ +

José Fonseca (1):

+
    +
  • gallium/tests: Don't ignore S3TC errors.
  • +
+ +

Karol Herbst (1):

+
    +
  • nir: fix printing of vec16 type
  • +
+ +

Lepton Wu (1):

+
    +
  • virgl: Fix flush in virgl_encoder_inline_write.
  • +
+ +

Lucas Stach (1):

+
    +
  • st/mesa: call resource_changed when binding a EGLImage to a texture
  • +
+ +

Mauro Rossi (2):

+
    +
  • radv: winsys/amdgpu: include missing pthread.h header
  • +
  • android: util/disk_cache: fix building errors in gallium drivers
  • +
+ +

Michel Dänzer (1):

+
    +
  • gallium: Check pipe_screen::resource_changed before dereferencing it
  • +
+ +

Roland Scheidegger (1):

+
    +
  • draw: force draw pipeline if there's more than 65535 vertices
  • +
+ +

Samuel Iglesias Gonsálvez (1):

+
    +
  • anv: fix assert in anv_CmdBindDescriptorSets()
  • +
+ +

Samuel Pitoiset (3):

+
    +
  • radv: make sure to wait for CP DMA when needed
  • +
  • radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9
  • +
  • radv: fix a memleak for merged shaders on GFX9
  • +
+ +
+ + diff -Nru mesa-18.1.3/meson.build mesa-18.1.5/meson.build --- mesa-18.1.3/meson.build 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/meson.build 2018-07-27 13:52:17.000000000 +0000 @@ -54,7 +54,7 @@ with_swr_arches = get_option('swr-arches').split(',') with_tools = get_option('tools').split(',') if with_tools.contains('all') - with_tools = ['freedreno', 'glsl', 'intel', 'nir', 'nouveau'] + with_tools = ['freedreno', 'glsl', 'intel', 'nir', 'nouveau', 'xvmc'] endif if get_option('texture-float') pre_args += '-DTEXTURE_FLOAT_ENABLED' @@ -928,7 +928,7 @@ pre_args += '-DMAJOR_IN_MKDEV' endif -foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h'] +foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 'dlfcn.h'] if cc.compiles('#include <@0@>'.format(h), name : '@0@'.format(h)) pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify()) endif diff -Nru mesa-18.1.3/meson_options.txt mesa-18.1.5/meson_options.txt --- mesa-18.1.3/meson_options.txt 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/meson_options.txt 2018-07-27 13:52:17.000000000 +0000 @@ -284,5 +284,5 @@ 'tools', type : 'string', value : '', - description : 'Comma delimited list of tools to build. choices : freedreno,glsl,intel,nir,nouveau or all' + description : 'Comma delimited list of tools to build. choices : freedreno,glsl,intel,nir,nouveau,xvmc or all' ) diff -Nru mesa-18.1.3/src/amd/vulkan/radv_cmd_buffer.c mesa-18.1.5/src/amd/vulkan/radv_cmd_buffer.c --- mesa-18.1.3/src/amd/vulkan/radv_cmd_buffer.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/amd/vulkan/radv_cmd_buffer.c 2018-07-27 13:52:17.000000000 +0000 @@ -319,11 +319,21 @@ } if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { + unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends; + unsigned eop_bug_offset; void *fence_ptr; + radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0, &cmd_buffer->gfx9_fence_offset, &fence_ptr); cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo; + + /* Allocate a buffer for the EOP bug on GFX9. */ + radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0, + &eop_bug_offset, &fence_ptr); + cmd_buffer->gfx9_eop_bug_va = + radv_buffer_get_va(cmd_buffer->upload.upload_bo); + cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; } cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; @@ -473,7 +483,7 @@ cmd_buffer->device->physical_device->rad_info.chip_class, ptr, va, radv_cmd_buffer_uses_mec(cmd_buffer), - flags); + flags, cmd_buffer->gfx9_eop_bug_va); } if (unlikely(cmd_buffer->device->trace_bo)) @@ -681,8 +691,11 @@ unsigned sx_blend_opt_control = 0; for (unsigned i = 0; i < subpass->color_count; ++i) { - if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) + if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { + sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); + sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); continue; + } int idx = subpass->color_attachments[i].attachment; struct radv_color_buffer_info *cb = &framebuffer->attachments[idx].cb; @@ -796,6 +809,10 @@ } } + for (unsigned i = subpass->color_count; i < 8; ++i) { + sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); + sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); + } radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); radeon_emit(cmd_buffer->cs, sx_ps_downconvert); radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); @@ -2500,6 +2517,11 @@ si_emit_cache_flush(cmd_buffer); } + /* Make sure CP DMA is idle at the end of IBs because the kernel + * doesn't wait for it. + */ + si_cp_dma_wait_for_idle(cmd_buffer); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs)) @@ -3003,8 +3025,9 @@ { struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { - if (!pipeline->shaders[stage]) + if (!radv_get_shader(pipeline, stage)) continue; + struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX); if (loc->sgpr_idx == -1) continue; @@ -4053,6 +4076,11 @@ 0); } + /* Make sure CP DMA is idle because the driver might have performed a + * DMA operation for copying or filling buffers/images. + */ + si_cp_dma_wait_for_idle(cmd_buffer); + cmd_buffer->state.flush_bits |= dst_flush_bits; } @@ -4069,6 +4097,11 @@ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18); + /* Make sure CP DMA is idle because the driver might have performed a + * DMA operation for copying or filling buffers/images. + */ + si_cp_dma_wait_for_idle(cmd_buffer); + /* TODO: this is overkill. Probably should figure something out from * the stage mask. */ @@ -4077,7 +4110,8 @@ cmd_buffer->device->physical_device->rad_info.chip_class, radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, - 1, va, 2, value); + 1, va, 2, value, + cmd_buffer->gfx9_eop_bug_va); assert(cmd_buffer->cs->cdw <= cdw_max); } diff -Nru mesa-18.1.3/src/amd/vulkan/radv_device.c mesa-18.1.5/src/amd/vulkan/radv_device.c --- mesa-18.1.3/src/amd/vulkan/radv_device.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/amd/vulkan/radv_device.c 2018-07-27 13:52:17.000000000 +0000 @@ -2181,7 +2181,7 @@ RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); + RADV_CMD_FLAG_INV_GLOBAL_L2, 0); } else if (i == 1) { si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, @@ -2191,7 +2191,7 @@ RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); + RADV_CMD_FLAG_INV_GLOBAL_L2, 0); } if (!queue->device->ws->cs_finalize(cs)) diff -Nru mesa-18.1.3/src/amd/vulkan/radv_pipeline.c mesa-18.1.5/src/amd/vulkan/radv_pipeline.c --- mesa-18.1.3/src/amd/vulkan/radv_pipeline.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/amd/vulkan/radv_pipeline.c 2018-07-27 13:52:17.000000000 +0000 @@ -2154,7 +2154,7 @@ for (int i = 0; i < MESA_SHADER_STAGES; ++i) { free(codes[i]); - if (modules[i]) { + if (nir[i]) { if (!pipeline->device->keep_shader_info) ralloc_free(nir[i]); @@ -2437,7 +2437,7 @@ pipeline->device->physical_device->rad_info.max_se); unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se); - unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_mode_cntl_1); + unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config); unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa); unsigned effective_samples = total_samples; unsigned color_bytes_per_pixel = 0; @@ -2462,7 +2462,7 @@ } const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se]; - while(color_entry->bpp <= color_bytes_per_pixel) + while(color_entry[1].bpp <= color_bytes_per_pixel) ++color_entry; extent = color_entry->extent; @@ -2476,7 +2476,7 @@ unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples; const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se]; - while(ds_entry->bpp <= ds_bytes_per_pixel) + while(ds_entry[1].bpp <= ds_bytes_per_pixel) ++ds_entry; extent.width = MIN2(extent.width, ds_entry->extent.width); diff -Nru mesa-18.1.3/src/amd/vulkan/radv_pipeline_cache.c mesa-18.1.5/src/amd/vulkan/radv_pipeline_cache.c --- mesa-18.1.3/src/amd/vulkan/radv_pipeline_cache.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/amd/vulkan/radv_pipeline_cache.c 2018-07-27 13:52:17.000000000 +0000 @@ -248,7 +248,6 @@ * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested. */ return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || - !device->physical_device->disk_cache || device->keep_shader_info; } @@ -271,7 +270,7 @@ /* Don't cache when we want debug info, since this isn't * present in the cache. */ - if (radv_is_cache_disabled(device)) { + if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) { pthread_mutex_unlock(&cache->mutex); return false; } diff -Nru mesa-18.1.3/src/amd/vulkan/radv_private.h mesa-18.1.5/src/amd/vulkan/radv_private.h --- mesa-18.1.3/src/amd/vulkan/radv_private.h 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/amd/vulkan/radv_private.h 2018-07-27 13:52:17.000000000 +0000 @@ -972,6 +972,9 @@ uint32_t last_num_instances; uint32_t last_first_instance; uint32_t last_vertex_offset; + + /* Whether CP DMA is busy/idle. */ + bool dma_is_busy; }; struct radv_cmd_pool { @@ -1034,6 +1037,7 @@ uint32_t gfx9_fence_offset; struct radeon_winsys_bo *gfx9_fence_bo; uint32_t gfx9_fence_idx; + uint64_t gfx9_eop_bug_va; /** * Whether a query pool has been resetted and we have to flush caches. @@ -1066,7 +1070,8 @@ unsigned data_sel, uint64_t va, uint32_t old_fence, - uint32_t new_fence); + uint32_t new_fence, + uint64_t gfx9_eop_bug_va); void si_emit_wait_fence(struct radeon_winsys_cs *cs, bool predicated, @@ -1076,7 +1081,8 @@ enum chip_class chip_class, uint32_t *fence_ptr, uint64_t va, bool is_mec, - enum radv_cmd_flush_bits flush_bits); + enum radv_cmd_flush_bits flush_bits, + uint64_t gfx9_eop_bug_va); void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va); void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, @@ -1086,6 +1092,8 @@ unsigned size); void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value); +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); + void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, diff -Nru mesa-18.1.3/src/amd/vulkan/radv_query.c mesa-18.1.5/src/amd/vulkan/radv_query.c --- mesa-18.1.3/src/amd/vulkan/radv_query.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/amd/vulkan/radv_query.c 2018-07-27 13:52:17.000000000 +0000 @@ -1169,7 +1169,8 @@ cmd_buffer->device->physical_device->rad_info.chip_class, radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, - 1, avail_va, 0, 1); + 1, avail_va, 0, 1, + cmd_buffer->gfx9_eop_bug_va); break; default: unreachable("ending unhandled query type"); @@ -1292,13 +1293,15 @@ cmd_buffer->device->physical_device->rad_info.chip_class, mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, - 3, query_va, 0, 0); + 3, query_va, 0, 0, + cmd_buffer->gfx9_eop_bug_va); si_cs_emit_write_event_eop(cs, false, cmd_buffer->device->physical_device->rad_info.chip_class, mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, - 1, avail_va, 0, 1); + 1, avail_va, 0, 1, + cmd_buffer->gfx9_eop_bug_va); break; } query_va += pool->stride; diff -Nru mesa-18.1.3/src/amd/vulkan/si_cmd_buffer.c mesa-18.1.5/src/amd/vulkan/si_cmd_buffer.c --- mesa-18.1.3/src/amd/vulkan/si_cmd_buffer.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/amd/vulkan/si_cmd_buffer.c 2018-07-27 13:52:17.000000000 +0000 @@ -852,7 +852,8 @@ unsigned data_sel, uint64_t va, uint32_t old_fence, - uint32_t new_fence) + uint32_t new_fence, + uint64_t gfx9_eop_bug_va) { unsigned op = EVENT_TYPE(event) | EVENT_INDEX(5) | @@ -860,6 +861,17 @@ unsigned is_gfx8_mec = is_mec && chip_class < GFX9; if (chip_class >= GFX9 || is_gfx8_mec) { + /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion + * counters) must immediately precede every timestamp event to + * prevent a GPU hang on GFX9. + */ + if (chip_class == GFX9) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); + radeon_emit(cs, gfx9_eop_bug_va); + radeon_emit(cs, gfx9_eop_bug_va >> 32); + } + radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, predicated)); radeon_emit(cs, op); radeon_emit(cs, EOP_DATA_SEL(data_sel)); @@ -941,7 +953,8 @@ uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, - enum radv_cmd_flush_bits flush_bits) + enum radv_cmd_flush_bits flush_bits, + uint64_t gfx9_eop_bug_va) { unsigned cp_coher_cntl = 0; uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | @@ -971,7 +984,8 @@ chip_class, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, - 0, 0, 0, 0, 0); + 0, 0, 0, 0, 0, + gfx9_eop_bug_va); } } if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { @@ -1057,7 +1071,8 @@ uint32_t old_fence = (*flush_cnt)++; si_cs_emit_write_event_eop(cs, false, chip_class, false, cb_db_event, tc_flags, 1, - flush_va, old_fence, *flush_cnt); + flush_va, old_fence, *flush_cnt, + gfx9_eop_bug_va); si_emit_wait_fence(cs, false, flush_va, *flush_cnt, 0xffffffff); } @@ -1149,7 +1164,8 @@ cmd_buffer->device->physical_device->rad_info.chip_class, ptr, va, radv_cmd_buffer_uses_mec(cmd_buffer), - cmd_buffer->state.flush_bits); + cmd_buffer->state.flush_bits, + cmd_buffer->gfx9_eop_bug_va); if (unlikely(cmd_buffer->device->trace_bo)) @@ -1214,7 +1230,6 @@ struct radeon_winsys_cs *cs = cmd_buffer->cs; uint32_t header = 0, command = 0; - assert(size); assert(size <= cp_dma_max_byte_count(cmd_buffer)); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); @@ -1273,9 +1288,14 @@ * indices. If we wanted to execute CP DMA in PFP, this packet * should precede it. */ - if ((flags & CP_DMA_SYNC) && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); - radeon_emit(cs, 0); + if (flags & CP_DMA_SYNC) { + if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); + radeon_emit(cs, 0); + } + + /* CP will see the sync flag and wait for all DMAs to complete. */ + cmd_buffer->state.dma_is_busy = false; } if (unlikely(cmd_buffer->device->trace_bo)) @@ -1339,6 +1359,8 @@ uint64_t main_src_va, main_dest_va; uint64_t skipped_size = 0, realign_size = 0; + /* Assume that we are not going to sync after the last DMA operation. */ + cmd_buffer->state.dma_is_busy = true; if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO || cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) { @@ -1402,6 +1424,9 @@ assert(va % 4 == 0 && size % 4 == 0); + /* Assume that we are not going to sync after the last DMA operation. */ + cmd_buffer->state.dma_is_busy = true; + while (size) { unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer)); unsigned dma_flags = CP_DMA_CLEAR; @@ -1417,6 +1442,25 @@ } } +void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK) + return; + + if (!cmd_buffer->state.dma_is_busy) + return; + + /* Issue a dummy DMA that copies zero bytes. + * + * The DMA engine will see that there's no work to do and skip this + * DMA request, however, the CP will see the sync flag and still wait + * for all DMAs to complete. + */ + si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC); + + cmd_buffer->state.dma_is_busy = false; +} + /* For MSAA sample positions. */ #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \ diff -Nru mesa-18.1.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h mesa-18.1.5/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h --- mesa-18.1.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h 2018-07-27 13:52:17.000000000 +0000 @@ -33,6 +33,7 @@ #include "addrlib/addrinterface.h" #include #include "util/list.h" +#include struct radv_amdgpu_winsys { struct radeon_winsys base; diff -Nru mesa-18.1.3/src/compiler/glsl/glsl_lexer.cpp mesa-18.1.5/src/compiler/glsl/glsl_lexer.cpp --- mesa-18.1.3/src/compiler/glsl/glsl_lexer.cpp 2018-06-29 17:48:18.000000000 +0000 +++ mesa-18.1.5/src/compiler/glsl/glsl_lexer.cpp 2018-07-27 13:53:08.000000000 +0000 @@ -3238,7 +3238,7 @@ case 238: YY_RULE_SETUP #line 631 "./glsl/glsl_lexer.ll" -DEPRECATED_ES_TYPE_WITH_ALT(yyextra->ARB_texture_rectangle_enable, glsl_type::sampler2DRect_type); +TYPE_WITH_ALT(110, 100, 0, 0, yyextra->ARB_texture_rectangle_enable, glsl_type::sampler2DRect_type); YY_BREAK case 239: YY_RULE_SETUP @@ -3248,7 +3248,7 @@ case 240: YY_RULE_SETUP #line 633 "./glsl/glsl_lexer.ll" -DEPRECATED_ES_TYPE_WITH_ALT(yyextra->ARB_texture_rectangle_enable, glsl_type::sampler2DRectShadow_type); +TYPE_WITH_ALT(110, 100, 0, 0, yyextra->ARB_texture_rectangle_enable, glsl_type::sampler2DRectShadow_type); YY_BREAK case 241: YY_RULE_SETUP diff -Nru mesa-18.1.3/src/compiler/glsl/glsl_lexer.ll mesa-18.1.5/src/compiler/glsl/glsl_lexer.ll --- mesa-18.1.3/src/compiler/glsl/glsl_lexer.ll 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/glsl/glsl_lexer.ll 2018-07-27 13:52:17.000000000 +0000 @@ -627,9 +627,9 @@ fvec2 KEYWORD(110, 100, 0, 0, FVEC2); fvec3 KEYWORD(110, 100, 0, 0, FVEC3); fvec4 KEYWORD(110, 100, 0, 0, FVEC4); -sampler2DRect DEPRECATED_ES_TYPE_WITH_ALT(yyextra->ARB_texture_rectangle_enable, glsl_type::sampler2DRect_type); +sampler2DRect TYPE_WITH_ALT(110, 100, 0, 0, yyextra->ARB_texture_rectangle_enable, glsl_type::sampler2DRect_type); sampler3DRect KEYWORD(110, 100, 0, 0, SAMPLER3DRECT); -sampler2DRectShadow DEPRECATED_ES_TYPE_WITH_ALT(yyextra->ARB_texture_rectangle_enable, glsl_type::sampler2DRectShadow_type); +sampler2DRectShadow TYPE_WITH_ALT(110, 100, 0, 0, yyextra->ARB_texture_rectangle_enable, glsl_type::sampler2DRectShadow_type); sizeof KEYWORD(110, 100, 0, 0, SIZEOF); cast KEYWORD(110, 100, 0, 0, CAST); namespace KEYWORD(110, 100, 0, 0, NAMESPACE); diff -Nru mesa-18.1.3/src/compiler/glsl/opt_algebraic.cpp mesa-18.1.5/src/compiler/glsl/opt_algebraic.cpp --- mesa-18.1.3/src/compiler/glsl/opt_algebraic.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/glsl/opt_algebraic.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -709,6 +709,12 @@ if (!is_vec_zero(zero)) continue; + /* We are allowed to add scalars with a vector or matrix. In that + * case lets just exit early. + */ + if (add->operands[0]->type != add->operands[1]->type) + continue; + /* Depending of the zero position we want to optimize * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) */ diff -Nru mesa-18.1.3/src/compiler/glsl/serialize.cpp mesa-18.1.5/src/compiler/glsl/serialize.cpp --- mesa-18.1.3/src/compiler/glsl/serialize.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/glsl/serialize.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -1044,6 +1044,7 @@ blob_write_bytes(metadata, glprog->sh.SamplerTargets, sizeof(glprog->sh.SamplerTargets)); blob_write_uint32(metadata, glprog->ShadowSamplers); + blob_write_uint32(metadata, glprog->ExternalSamplersUsed); blob_write_bytes(metadata, glprog->sh.ImageAccess, sizeof(glprog->sh.ImageAccess)); @@ -1096,6 +1097,7 @@ blob_copy_bytes(metadata, (uint8_t *) glprog->sh.SamplerTargets, sizeof(glprog->sh.SamplerTargets)); glprog->ShadowSamplers = blob_read_uint32(metadata); + glprog->ExternalSamplersUsed = blob_read_uint32(metadata); blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageAccess, sizeof(glprog->sh.ImageAccess)); diff -Nru mesa-18.1.3/src/compiler/nir/nir_loop_analyze.c mesa-18.1.5/src/compiler/nir/nir_loop_analyze.c --- mesa-18.1.3/src/compiler/nir/nir_loop_analyze.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/nir/nir_loop_analyze.c 2018-07-27 13:52:17.000000000 +0000 @@ -341,8 +341,8 @@ nir_loop_terminator *terminator = rzalloc(state->loop->info, nir_loop_terminator); - list_add(&terminator->loop_terminator_link, - &state->loop->info->loop_terminator_list); + list_addtail(&terminator->loop_terminator_link, + &state->loop->info->loop_terminator_list); terminator->nif = nif; terminator->break_block = break_blk; diff -Nru mesa-18.1.3/src/compiler/nir/nir_opt_if.c mesa-18.1.5/src/compiler/nir/nir_opt_if.c --- mesa-18.1.3/src/compiler/nir/nir_opt_if.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/nir/nir_opt_if.c 2018-07-27 13:52:17.000000000 +0000 @@ -25,6 +25,28 @@ #include "nir_control_flow.h" /** + * Gets the single block that jumps back to the loop header. Already assumes + * there is exactly one such block. + */ +static nir_block* +find_continue_block(nir_loop *loop) +{ + nir_block *header_block = nir_loop_first_block(loop); + nir_block *prev_block = + nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); + + assert(header_block->predecessors->entries == 2); + + struct set_entry *pred_entry; + set_foreach(header_block->predecessors, pred_entry) { + if (pred_entry->key != prev_block) + return (nir_block*)pred_entry->key; + } + + unreachable("Continue block not found!"); +} + +/** * This optimization detects if statements at the tops of loops where the * condition is a phi node of two constants and moves half of the if to above * the loop and the other half of the if to the end of the loop. A simple for @@ -95,12 +117,7 @@ if (header_block->predecessors->entries != 2) return false; - nir_block *continue_block = NULL; - struct set_entry *pred_entry; - set_foreach(header_block->predecessors, pred_entry) { - if (pred_entry->key != prev_block) - continue_block = (void *)pred_entry->key; - } + nir_block *continue_block = find_continue_block(loop); nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node); if (!if_node || if_node->type != nir_cf_node_if) @@ -191,6 +208,10 @@ nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node)); nir_cf_reinsert(&header, nir_after_block_before_jump(continue_block)); + + /* Get continue block again as the previous reinsert might have removed the block. */ + continue_block = find_continue_block(loop); + nir_cf_extract(&tmp, nir_before_cf_list(continue_list), nir_after_cf_list(continue_list)); nir_cf_reinsert(&tmp, nir_after_block_before_jump(continue_block)); diff -Nru mesa-18.1.3/src/compiler/nir/nir_opt_loop_unroll.c mesa-18.1.5/src/compiler/nir/nir_opt_loop_unroll.c --- mesa-18.1.3/src/compiler/nir/nir_opt_loop_unroll.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/nir/nir_opt_loop_unroll.c 2018-07-27 13:52:17.000000000 +0000 @@ -530,14 +530,14 @@ if (num_lt == 2) { bool limiting_term_second = true; nir_loop_terminator *terminator = - list_last_entry(&loop->info->loop_terminator_list, + list_first_entry(&loop->info->loop_terminator_list, nir_loop_terminator, loop_terminator_link); if (terminator->nif == loop->info->limiting_terminator->nif) { limiting_term_second = false; terminator = - list_first_entry(&loop->info->loop_terminator_list, + list_last_entry(&loop->info->loop_terminator_list, nir_loop_terminator, loop_terminator_link); } diff -Nru mesa-18.1.3/src/compiler/nir/nir_print.c mesa-18.1.5/src/compiler/nir/nir_print.c --- mesa-18.1.3/src/compiler/nir/nir_print.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/nir/nir_print.c 2018-07-27 13:52:17.000000000 +0000 @@ -87,6 +87,7 @@ static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4", "error", "error", "error", "vec8", + "error", "error", "error", "error", "error", "error", "error", "vec16"}; static void diff -Nru mesa-18.1.3/src/compiler/nir/nir_serialize.c mesa-18.1.5/src/compiler/nir/nir_serialize.c --- mesa-18.1.3/src/compiler/nir/nir_serialize.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/nir/nir_serialize.c 2018-07-27 13:52:17.000000000 +0000 @@ -124,7 +124,7 @@ blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values)); c->num_elements = blob_read_uint32(ctx->blob); - c->elements = ralloc_array(ctx->nir, nir_constant *, c->num_elements); + c->elements = ralloc_array(nvar, nir_constant *, c->num_elements); for (unsigned i = 0; i < c->num_elements; i++) c->elements[i] = read_constant(ctx, nvar); diff -Nru mesa-18.1.3/src/compiler/spirv/spirv_to_nir.c mesa-18.1.5/src/compiler/spirv/spirv_to_nir.c --- mesa-18.1.3/src/compiler/spirv/spirv_to_nir.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/compiler/spirv/spirv_to_nir.c 2018-07-27 13:52:17.000000000 +0000 @@ -2278,6 +2278,18 @@ return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); } +static nir_ssa_def * +expand_to_vec4(nir_builder *b, nir_ssa_def *value) +{ + if (value->num_components == 4) + return value; + + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < value->num_components ? i : 0; + return nir_swizzle(b, value, swiz, 4, false); +} + static void vtn_handle_image(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -2391,11 +2403,7 @@ /* The image coordinate is always 4 components but we may not have that * many. Swizzle to compensate. */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < image.coord->num_components ? i : 0; - intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, - swiz, 4, false)); + intrin->src[0] = nir_src_for_ssa(expand_to_vec4(&b->nb, image.coord)); intrin->src[1] = nir_src_for_ssa(image.sample); } @@ -2405,11 +2413,13 @@ case SpvOpImageRead: break; case SpvOpAtomicStore: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); - break; - case SpvOpImageWrite: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); + case SpvOpImageWrite: { + const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3]; + nir_ssa_def *value = vtn_ssa_value(b, value_id)->def; + /* nir_intrinsic_image_deref_store always takes a vec4 value */ + intrin->src[2] = nir_src_for_ssa(expand_to_vec4(&b->nb, value)); break; + } case SpvOpAtomicCompareExchange: case SpvOpAtomicIIncrement: @@ -2431,23 +2441,26 @@ vtn_fail("Invalid image opcode"); } - if (opcode != SpvOpImageWrite) { + if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - unsigned dest_components = nir_intrinsic_dest_components(intrin); - if (intrin->intrinsic == nir_intrinsic_image_var_size) { - dest_components = intrin->num_components = - glsl_get_vector_elements(type->type); - } + unsigned dest_components = glsl_get_vector_elements(type->type); + intrin->num_components = nir_intrinsic_infos[op].dest_components; + if (intrin->num_components == 0) + intrin->num_components = dest_components; nir_ssa_dest_init(&intrin->instr, &intrin->dest, - dest_components, 32, NULL); + intrin->num_components, 32, NULL); nir_builder_instr_insert(&b->nb, &intrin->instr); + nir_ssa_def *result = &intrin->dest.ssa; + if (intrin->num_components != dest_components) + result = nir_channels(&b->nb, result, (1 << dest_components) - 1); + val->ssa = vtn_create_ssa_value(b, type->type); - val->ssa->def = &intrin->dest.ssa; + val->ssa->def = result; } else { nir_builder_instr_insert(&b->nb, &intrin->instr); } diff -Nru mesa-18.1.3/src/egl/main/eglapi.c mesa-18.1.5/src/egl/main/eglapi.c --- mesa-18.1.3/src/egl/main/eglapi.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/egl/main/eglapi.c 2018-07-27 13:52:17.000000000 +0000 @@ -1320,9 +1320,7 @@ } /** - * If the width of the passed rect is greater than the surface's - * width then it is clamped to the width of the surface. Same with - * height. + * Clamp the rectangles so that they lie within the surface. */ static void @@ -1334,17 +1332,16 @@ EGLint surf_width = surf->Width; for (i = 0; i < (4 * n_rects); i += 4) { - EGLint x, y, rect_width, rect_height; - x = rects[i]; - y = rects[i + 1]; - rect_width = rects[i + 2]; - rect_height = rects[i + 3]; - - if (rect_width > surf_width - x) - rects[i + 2] = surf_width - x; - - if (rect_height > surf_height - y) - rects[i + 3] = surf_height - y; + EGLint x1, y1, x2, y2; + x1 = rects[i]; + y1 = rects[i + 1]; + x2 = rects[i + 2] + x1; + y2 = rects[i + 3] + y1; + + rects[i] = CLAMP(x1, 0, surf_width); + rects[i + 1] = CLAMP(y1, 0, surf_height); + rects[i + 2] = CLAMP(x2, 0, surf_width) - rects[i]; + rects[i + 3] = CLAMP(y2, 0, surf_height) - rects[i + 1]; } } diff -Nru mesa-18.1.3/src/egl/Makefile.am mesa-18.1.5/src/egl/Makefile.am --- mesa-18.1.3/src/egl/Makefile.am 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/egl/Makefile.am 2018-07-27 13:52:17.000000000 +0000 @@ -80,6 +80,7 @@ if HAVE_PLATFORM_WAYLAND drivers/dri2/linux-dmabuf-unstable-v1-protocol.lo: drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h drivers/dri2/egl_dri2.lo: drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h +drivers/dri2/platform_wayland.lo: drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h AM_CFLAGS += $(WAYLAND_CLIENT_CFLAGS) libEGL_common_la_LIBADD += $(WAYLAND_CLIENT_LIBS) diff -Nru mesa-18.1.3/src/egl/Makefile.in mesa-18.1.5/src/egl/Makefile.in --- mesa-18.1.3/src/egl/Makefile.in 2018-06-29 17:47:38.000000000 +0000 +++ mesa-18.1.5/src/egl/Makefile.in 2018-07-27 13:52:26.000000000 +0000 @@ -1655,6 +1655,7 @@ @HAVE_PLATFORM_WAYLAND_TRUE@drivers/dri2/linux-dmabuf-unstable-v1-protocol.lo: drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h @HAVE_PLATFORM_WAYLAND_TRUE@drivers/dri2/egl_dri2.lo: drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h +@HAVE_PLATFORM_WAYLAND_TRUE@drivers/dri2/platform_wayland.lo: drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h g_egldispatchstubs.c: $(GLVND_GEN_DEPS) $(PYTHON_GEN) $(top_srcdir)/src/egl/generate/gen_egl_dispatch.py source \ $(top_srcdir)/src/egl/generate/eglFunctionList.py \ diff -Nru mesa-18.1.3/src/gallium/auxiliary/draw/draw_pt_emit.c mesa-18.1.5/src/gallium/auxiliary/draw/draw_pt_emit.c --- mesa-18.1.3/src/gallium/auxiliary/draw/draw_pt_emit.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/auxiliary/draw/draw_pt_emit.c 2018-07-27 13:52:17.000000000 +0000 @@ -158,6 +158,7 @@ */ render->set_primitive(draw->render, prim_info->prim); + assert(vertex_count <= 65535); render->allocate_vertices(render, (ushort)translate->key.output_stride, (ushort)vertex_count); @@ -229,6 +230,7 @@ */ render->set_primitive(draw->render, prim_info->prim); + assert(count <= 65535); if (!render->allocate_vertices(render, (ushort)translate->key.output_stride, (ushort)count)) diff -Nru mesa-18.1.3/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c mesa-18.1.5/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c --- mesa-18.1.3/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c 2018-07-27 13:52:17.000000000 +0000 @@ -299,6 +299,16 @@ FREE(vert_info->verts); vert_info = &gs_vert_info; prim_info = &gs_prim_info; + + /* + * pt emit can only handle ushort number of vertices (see + * render->allocate_vertices). + * vsplit guarantees there's never more than 4096, however GS can + * easily blow this up (by a factor of 256 (or even 1024) max). + */ + if (vert_info->count > 65535) { + opt |= PT_PIPELINE; + } } else { if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) { draw_prim_assembler_run(draw, prim_info, vert_info, diff -Nru mesa-18.1.3/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c mesa-18.1.5/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c --- mesa-18.1.3/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 2018-07-27 13:52:17.000000000 +0000 @@ -428,6 +428,15 @@ FREE(vert_info->verts); vert_info = &gs_vert_info; prim_info = &gs_prim_info; + /* + * pt emit can only handle ushort number of vertices (see + * render->allocate_vertices). + * vsplit guarantees there's never more than 4096, however GS can + * easily blow this up (by a factor of 256 (or even 1024) max). + */ + if (vert_info->count > 65535) { + opt |= PT_PIPELINE; + } } else { if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) { draw_prim_assembler_run(draw, prim_info, vert_info, diff -Nru mesa-18.1.3/src/gallium/auxiliary/driver_ddebug/dd_screen.c mesa-18.1.5/src/gallium/auxiliary/driver_ddebug/dd_screen.c --- mesa-18.1.3/src/gallium/auxiliary/driver_ddebug/dd_screen.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/auxiliary/driver_ddebug/dd_screen.c 2018-07-27 13:52:17.000000000 +0000 @@ -284,7 +284,8 @@ { struct pipe_screen *screen = dd_screen(_screen)->screen; - screen->resource_changed(screen, res); + if (screen->resource_changed) + screen->resource_changed(screen, res); } static void diff -Nru mesa-18.1.3/src/gallium/auxiliary/driver_rbug/rbug_screen.c mesa-18.1.5/src/gallium/auxiliary/driver_rbug/rbug_screen.c --- mesa-18.1.3/src/gallium/auxiliary/driver_rbug/rbug_screen.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/auxiliary/driver_rbug/rbug_screen.c 2018-07-27 13:52:17.000000000 +0000 @@ -222,7 +222,8 @@ struct pipe_screen *screen = rb_screen->screen; struct pipe_resource *resource = rb_resource->resource; - screen->resource_changed(screen, resource); + if (screen->resource_changed) + screen->resource_changed(screen, resource); } static void diff -Nru mesa-18.1.3/src/gallium/auxiliary/driver_trace/tr_screen.c mesa-18.1.5/src/gallium/auxiliary/driver_trace/tr_screen.c --- mesa-18.1.3/src/gallium/auxiliary/driver_trace/tr_screen.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/auxiliary/driver_trace/tr_screen.c 2018-07-27 13:52:17.000000000 +0000 @@ -444,7 +444,8 @@ trace_dump_arg(ptr, screen); trace_dump_arg(ptr, resource); - screen->resource_changed(screen, resource); + if (screen->resource_changed) + screen->resource_changed(screen, resource); trace_dump_call_end(); } diff -Nru mesa-18.1.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp mesa-18.1.5/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp --- mesa-18.1.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -415,6 +415,7 @@ TargetNVC0::insnCanLoadOffset(const Instruction *insn, int s, int offset) const { const ValueRef& ref = insn->src(s); + offset += insn->src(s).get()->reg.data.offset; if (ref.getFile() == FILE_MEMORY_CONST && (insn->op != OP_LOAD || insn->subOp != NV50_IR_SUBOP_LDC_IS)) return offset >= -0x8000 && offset < 0x8000; diff -Nru mesa-18.1.3/src/gallium/drivers/r600/r600_shader.c mesa-18.1.5/src/gallium/drivers/r600/r600_shader.c --- mesa-18.1.3/src/gallium/drivers/r600/r600_shader.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/drivers/r600/r600_shader.c 2018-07-27 13:52:17.000000000 +0000 @@ -1673,19 +1673,11 @@ } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { r600_src->sel = 2; } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { - if (ctx->type == PIPE_SHADER_TESS_CTRL) { - r600_src->sel = ctx->tess_input_info; - r600_src->swizzle[0] = 2; - r600_src->swizzle[1] = 2; - r600_src->swizzle[2] = 2; - r600_src->swizzle[3] = 2; - } else { - r600_src->sel = ctx->tess_input_info; - r600_src->swizzle[0] = 3; - r600_src->swizzle[1] = 3; - r600_src->swizzle[2] = 3; - r600_src->swizzle[3] = 3; - } + r600_src->sel = ctx->tess_input_info; + r600_src->swizzle[0] = 2; + r600_src->swizzle[1] = 2; + r600_src->swizzle[2] = 2; + r600_src->swizzle[3] = 2; } else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { r600_src->sel = 0; r600_src->swizzle[0] = 0; @@ -3559,7 +3551,7 @@ ctx.tess_input_info = ++regno; ctx.tess_output_info = ++regno; } else if (ctx.type == PIPE_SHADER_TESS_EVAL) { - ctx.tess_input_info = 0; + ctx.tess_input_info = ++regno; ctx.tess_output_info = ++regno; } else if (ctx.type == PIPE_SHADER_GEOMETRY) { ctx.gs_export_gpr_tregs[0] = ++regno; diff -Nru mesa-18.1.3/src/gallium/drivers/r600/sb/sb_expr.cpp mesa-18.1.5/src/gallium/drivers/r600/sb/sb_expr.cpp --- mesa-18.1.3/src/gallium/drivers/r600/sb/sb_expr.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/drivers/r600/sb/sb_expr.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -945,6 +945,8 @@ if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) { if (fold_assoc(&n)) return true; + if (n.src.size() < 3) + return fold_alu_op2(n); } value* v0 = n.src[0]->gvalue(); diff -Nru mesa-18.1.3/src/gallium/drivers/r600/sb/sb_if_conversion.cpp mesa-18.1.5/src/gallium/drivers/r600/sb/sb_if_conversion.cpp --- mesa-18.1.3/src/gallium/drivers/r600/sb/sb_if_conversion.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/drivers/r600/sb/sb_if_conversion.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -42,16 +42,13 @@ regions_vec &rv = sh.get_regions(); unsigned converted = 0; - - for (regions_vec::reverse_iterator N, I = rv.rbegin(), E = rv.rend(); - I != E; I = N) { - N = I; ++N; - + for (regions_vec::reverse_iterator I = rv.rbegin(); I != rv.rend(); ) { region_node *r = *I; if (run_on(r)) { - rv.erase(I.base() - 1); + I = regions_vec::reverse_iterator(rv.erase((++I).base())); ++converted; - } + } else + ++I; } return 0; } diff -Nru mesa-18.1.3/src/gallium/drivers/radeonsi/si_blit.c mesa-18.1.5/src/gallium/drivers/radeonsi/si_blit.c --- mesa-18.1.3/src/gallium/drivers/radeonsi/si_blit.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/drivers/radeonsi/si_blit.c 2018-07-27 13:52:17.000000000 +0000 @@ -1317,9 +1317,33 @@ } /* Always do the analysis even if DCC is disabled at the moment. */ - if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) { - rtex->separate_dcc_dirty = false; - vi_separate_dcc_process_and_reset_stats(ctx, rtex); + if (rtex->dcc_gather_statistics) { + bool separate_dcc_dirty = rtex->separate_dcc_dirty; + + /* If the color buffer hasn't been unbound and fast clear hasn't + * been used, separate_dcc_dirty is false, but there may have been + * new rendering. Check if the color buffer is bound and assume + * it's dirty. + * + * Note that DRI2 never unbinds window colorbuffers, which means + * the DCC pipeline statistics query would never be re-set and would + * keep adding new results until all free memory is exhausted if we + * didn't do this. + */ + if (!separate_dcc_dirty) { + for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { + if (sctx->framebuffer.state.cbufs[i] && + sctx->framebuffer.state.cbufs[i]->texture == res) { + separate_dcc_dirty = true; + break; + } + } + } + + if (separate_dcc_dirty) { + rtex->separate_dcc_dirty = false; + vi_separate_dcc_process_and_reset_stats(ctx, rtex); + } } } diff -Nru mesa-18.1.3/src/gallium/drivers/radeonsi/si_compute.c mesa-18.1.5/src/gallium/drivers/radeonsi/si_compute.c --- mesa-18.1.3/src/gallium/drivers/radeonsi/si_compute.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/drivers/radeonsi/si_compute.c 2018-07-27 13:52:17.000000000 +0000 @@ -221,6 +221,12 @@ const amd_kernel_code_t *code_object = si_compute_get_code_object(program, 0); code_object_to_config(code_object, &program->shader.config); + if (program->shader.binary.reloc_count != 0) { + fprintf(stderr, "Error: %d unsupported relocations\n", + program->shader.binary.reloc_count); + FREE(program); + return NULL; + } } else { si_shader_binary_read_config(&program->shader.binary, &program->shader.config, 0); diff -Nru mesa-18.1.3/src/gallium/drivers/vc4/vc4_resource.c mesa-18.1.5/src/gallium/drivers/vc4/vc4_resource.c --- mesa-18.1.3/src/gallium/drivers/vc4/vc4_resource.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/drivers/vc4/vc4_resource.c 2018-07-27 13:52:17.000000000 +0000 @@ -161,7 +161,7 @@ */ if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && - !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) && + !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) && prsc->last_level == 0 && prsc->width0 == box->width && prsc->height0 == box->height && diff -Nru mesa-18.1.3/src/gallium/drivers/virgl/virgl_encode.c mesa-18.1.5/src/gallium/drivers/virgl/virgl_encode.c --- mesa-18.1.3/src/gallium/drivers/virgl/virgl_encode.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/drivers/virgl/virgl_encode.c 2018-07-27 13:52:17.000000000 +0000 @@ -524,7 +524,7 @@ left_bytes = size; while (left_bytes) { - if (ctx->cbuf->cdw + 12 > VIRGL_MAX_CMDBUF_DWORDS) + if (ctx->cbuf->cdw + 12 >= VIRGL_MAX_CMDBUF_DWORDS) ctx->base.flush(&ctx->base, NULL, 0); thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - 12) * 4; diff -Nru mesa-18.1.3/src/gallium/state_trackers/clover/core/event.cpp mesa-18.1.5/src/gallium/state_trackers/clover/core/event.cpp --- mesa-18.1.3/src/gallium/state_trackers/clover/core/event.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/state_trackers/clover/core/event.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -49,12 +49,14 @@ } void -event::trigger() { +event::trigger() try { if (wait_count() == 1) action_ok(*this); for (event &ev : trigger_self()) ev.trigger(); +} catch (error &e) { + abort(e.get()); } std::vector> diff -Nru mesa-18.1.3/src/gallium/state_trackers/clover/core/kernel.cpp mesa-18.1.5/src/gallium/state_trackers/clover/core/kernel.cpp --- mesa-18.1.3/src/gallium/state_trackers/clover/core/kernel.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/state_trackers/clover/core/kernel.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -231,6 +231,10 @@ cs.req_local_mem = mem_local; cs.req_input_mem = input.size(); st = q->pipe->create_compute_state(q->pipe, &cs); + if (!st) { + unbind(); // Cleanup + throw error(CL_OUT_OF_RESOURCES); + } } return st; diff -Nru mesa-18.1.3/src/gallium/state_trackers/dri/dri_helpers.c mesa-18.1.5/src/gallium/state_trackers/dri/dri_helpers.c --- mesa-18.1.3/src/gallium/state_trackers/dri/dri_helpers.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/state_trackers/dri/dri_helpers.c 2018-07-27 13:52:17.000000000 +0000 @@ -214,6 +214,12 @@ struct pipe_context *ctx = dri_context(_ctx)->st->pipe; struct dri2_fence *fence = (struct dri2_fence*)_fence; + /* We might be called here with a NULL fence as a result of WaitSyncKHR + * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case. + */ + if (!fence) + return; + if (ctx->fence_server_sync) ctx->fence_server_sync(ctx, fence->pipe_fence); } diff -Nru mesa-18.1.3/src/gallium/state_trackers/xvmc/meson.build mesa-18.1.5/src/gallium/state_trackers/xvmc/meson.build --- mesa-18.1.3/src/gallium/state_trackers/xvmc/meson.build 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/state_trackers/xvmc/meson.build 2018-07-27 13:52:17.000000000 +0000 @@ -27,31 +27,27 @@ ) # These tests will not work without a working xvmc configuration. -if with_tests +if with_tools.contains('xvmc') dep_xvmcw = cc.find_library('XvMCW') dep_real_xvmc = dependency('xvmc') foreach x : ['context', 'surface', 'subpicture', 'blocks', 'rendering'] _name = 'xvmc_@0@'.format(x) - test( - _name, - executable( - _name, - files('tests/test_@0@.c'.format(x), 'tests/testlib.c'), - dependencies : [ - dep_real_xvmc, dep_x11_xcb, dep_xcb, dep_xcb_dri2, dep_xcb_dri3, dep_xvmcw, - ], - ) - ) - endforeach - - test( - 'xvmc_bench', executable( - 'xvmc_bench', - files('tests/xvmc_bench.c', 'tests/testlib.c'), + _name, + files('tests/test_@0@.c'.format(x), 'tests/testlib.c'), dependencies : [ dep_real_xvmc, dep_x11_xcb, dep_xcb, dep_xcb_dri2, dep_xcb_dri3, dep_xvmcw, ], + install: true, ) + endforeach + + executable( + 'xvmc_bench', + files('tests/xvmc_bench.c', 'tests/testlib.c'), + dependencies : [ + dep_real_xvmc, dep_x11_xcb, dep_xcb, dep_xcb_dri2, dep_xcb_dri3, dep_xvmcw, + ], + install: true, ) endif diff -Nru mesa-18.1.3/src/gallium/tests/unit/u_format_test.c mesa-18.1.5/src/gallium/tests/unit/u_format_test.c --- mesa-18.1.3/src/gallium/tests/unit/u_format_test.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/gallium/tests/unit/u_format_test.c 2018-07-27 13:52:17.000000000 +0000 @@ -380,11 +380,6 @@ if (util_is_double_nan(test->unpacked[0][0][0])) success = TRUE; - /* Ignore S3TC errors */ - if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - success = TRUE; - } - if (!success) { print_unpacked_rgba_8unorm(format_desc, "FAILED: ", unpacked, " obtained\n"); print_unpacked_rgba_8unorm(format_desc, " ", expected, " expected\n"); diff -Nru mesa-18.1.3/src/glx/glxcurrent.c mesa-18.1.5/src/glx/glxcurrent.c --- mesa-18.1.3/src/glx/glxcurrent.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/glx/glxcurrent.c 2018-07-27 13:52:17.000000000 +0000 @@ -194,6 +194,13 @@ return True; } + /* can't have only one be 0 */ + if (!!draw != !!read) { + __glXUnlock(); + __glXSendError(dpy, BadMatch, None, X_GLXMakeContextCurrent, True); + return False; + } + if (oldGC != &dummyContext) { if (--oldGC->thread_refcount == 0) { oldGC->vtable->unbind(oldGC, gc); diff -Nru mesa-18.1.3/src/intel/blorp/blorp_blit.c mesa-18.1.5/src/intel/blorp/blorp_blit.c --- mesa-18.1.3/src/intel/blorp/blorp_blit.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/blorp/blorp_blit.c 2018-07-27 13:52:17.000000000 +0000 @@ -1155,6 +1155,20 @@ key->dst_layout); } + nir_ssa_def *comp = NULL; + if (key->dst_rgb) { + /* The destination image is bound as a red texture three times as wide + * as the actual image. Our shader is effectively running one color + * component at a time. We need to save off the component and adjust + * the destination position. + */ + assert(dst_pos->num_components == 2); + nir_ssa_def *dst_x = nir_channel(&b, dst_pos, 0); + comp = nir_umod(&b, dst_x, nir_imm_int(&b, 3)); + dst_pos = nir_vec2(&b, nir_idiv(&b, dst_x, nir_imm_int(&b, 3)), + nir_channel(&b, dst_pos, 1)); + } + /* Now (X, Y, S) = decode_msaa(dst_samples, detile(dst_tiling, offset)). * * That is: X, Y and S now contain the true coordinates and sample index of @@ -1285,8 +1299,6 @@ * from the source color and write that to destination red. */ assert(dst_pos->num_components == 2); - nir_ssa_def *comp = - nir_umod(&b, nir_channel(&b, dst_pos, 0), nir_imm_int(&b, 3)); nir_ssa_def *color_component = nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)), @@ -1547,18 +1559,15 @@ struct blt_axis x, y; }; -static void +void surf_fake_rgb_with_red(const struct isl_device *isl_dev, - struct brw_blorp_surface_info *info, - uint32_t *x, uint32_t *width) + struct brw_blorp_surface_info *info) { blorp_surf_convert_to_single_slice(isl_dev, info); info->surf.logical_level0_px.width *= 3; info->surf.phys_level0_sa.width *= 3; info->tile_x_sa *= 3; - *x *= 3; - *width *= 3; enum isl_format red_format; switch (info->view.format) { @@ -1588,28 +1597,6 @@ info->surf.format = info->view.format = red_format; } -static void -fake_dest_rgb_with_red(const struct isl_device *dev, - struct blorp_params *params, - struct brw_blorp_blit_prog_key *wm_prog_key, - struct blt_coords *coords) -{ - /* Handle RGB destinations for blorp_copy */ - const struct isl_format_layout *dst_fmtl = - isl_format_get_layout(params->dst.surf.format); - - if (dst_fmtl->bpb % 3 == 0) { - uint32_t dst_x = coords->x.dst0; - uint32_t dst_width = coords->x.dst1 - dst_x; - surf_fake_rgb_with_red(dev, ¶ms->dst, - &dst_x, &dst_width); - coords->x.dst0 = dst_x; - coords->x.dst1 = dst_x + dst_width; - wm_prog_key->dst_rgb = true; - wm_prog_key->need_dst_offset = true; - } -} - enum blit_shrink_status { BLIT_NO_SHRINK = 0, BLIT_WIDTH_SHRINK = 1, @@ -1628,8 +1615,6 @@ { const struct gen_device_info *devinfo = batch->blorp->isl_dev->info; - fake_dest_rgb_with_red(batch->blorp->isl_dev, params, wm_prog_key, coords); - if (isl_format_has_sint_channel(params->src.view.format)) { wm_prog_key->texture_data_type = nir_type_int; } else if (isl_format_has_uint_channel(params->src.view.format)) { @@ -1835,6 +1820,21 @@ params->src.view.base_level); } + if (isl_format_get_layout(params->dst.view.format)->bpb % 3 == 0) { + /* We can't render to RGB formats natively because they aren't a + * power-of-two size. Instead, we fake them by using a red format + * with the same channel type and size and emitting shader code to + * only write one channel at a time. + */ + params->x0 *= 3; + params->x1 *= 3; + + surf_fake_rgb_with_red(batch->blorp->isl_dev, ¶ms->dst); + + wm_prog_key->dst_rgb = true; + wm_prog_key->need_dst_offset = true; + } + if (params->src.tile_x_sa || params->src.tile_y_sa) { assert(wm_prog_key->need_src_offset); surf_get_intratile_offset_px(¶ms->src, diff -Nru mesa-18.1.3/src/intel/blorp/blorp_clear.c mesa-18.1.5/src/intel/blorp/blorp_clear.c --- mesa-18.1.3/src/intel/blorp/blorp_clear.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/blorp/blorp_clear.c 2018-07-27 13:52:17.000000000 +0000 @@ -38,17 +38,20 @@ { enum blorp_shader_type shader_type; /* Must be BLORP_SHADER_TYPE_CLEAR */ bool use_simd16_replicated_data; + bool clear_rgb_as_red; bool pad[3]; }; static bool blorp_params_get_clear_kernel(struct blorp_context *blorp, struct blorp_params *params, - bool use_replicated_data) + bool use_replicated_data, + bool clear_rgb_as_red) { const struct brw_blorp_const_color_prog_key blorp_key = { .shader_type = BLORP_SHADER_TYPE_CLEAR, .use_simd16_replicated_data = use_replicated_data, + .clear_rgb_as_red = clear_rgb_as_red, }; if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key), @@ -63,13 +66,34 @@ nir_variable *v_color = BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type()); + nir_ssa_def *color = nir_load_var(&b, v_color); + + if (clear_rgb_as_red) { + nir_variable *frag_coord = + nir_variable_create(b.shader, nir_var_shader_in, + glsl_vec4_type(), "gl_FragCoord"); + frag_coord->data.location = VARYING_SLOT_POS; + frag_coord->data.origin_upper_left = true; + + nir_ssa_def *pos = nir_f2i32(&b, nir_load_var(&b, frag_coord)); + nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, pos, 0), + nir_imm_int(&b, 3)); + nir_ssa_def *color_component = + nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)), + nir_channel(&b, color, 0), + nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)), + nir_channel(&b, color, 1), + nir_channel(&b, color, 2))); + + nir_ssa_def *u = nir_ssa_undef(&b, 1, 32); + color = nir_vec4(&b, color_component, u, u, u); + } nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), "gl_FragColor"); frag_color->data.location = FRAG_RESULT_COLOR; - - nir_copy_var(&b, frag_color, v_color); + nir_store_var(&b, frag_color, color, 0xf); struct brw_wm_prog_key wm_key; brw_blorp_init_wm_prog_key(&wm_key); @@ -327,7 +351,7 @@ get_fast_clear_rect(batch->blorp->isl_dev, surf->aux_surf, ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true)) + if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true, false)) return; brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, @@ -378,6 +402,7 @@ clear_color = swizzle_color_value(clear_color, swizzle); swizzle = ISL_SWIZZLE_IDENTITY; + bool clear_rgb_as_red = false; if (format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) { clear_color.u32[0] = float3_to_rgb9e5(clear_color.f32); format = ISL_FORMAT_R32_UINT; @@ -391,6 +416,13 @@ const struct isl_swizzle ARGB = ISL_SWIZZLE(ALPHA, RED, GREEN, BLUE); clear_color = swizzle_color_value(clear_color, ARGB); format = ISL_FORMAT_B4G4R4A4_UNORM; + } else if (isl_format_get_layout(format)->bpb % 3 == 0) { + clear_rgb_as_red = true; + if (format == ISL_FORMAT_R8G8B8_UNORM_SRGB) { + clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]); + clear_color.f32[1] = util_format_linear_to_srgb_float(clear_color.f32[1]); + clear_color.f32[2] = util_format_linear_to_srgb_float(clear_color.f32[2]); + } } memcpy(¶ms.wm_inputs.clear_color, clear_color.f32, sizeof(float) * 4); @@ -422,7 +454,8 @@ } if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, - use_simd16_replicated_data)) + use_simd16_replicated_data, + clear_rgb_as_red)) return; if (!blorp_ensure_sf_program(batch->blorp, ¶ms)) @@ -455,6 +488,12 @@ blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, ¶ms.dst); } + if (clear_rgb_as_red) { + surf_fake_rgb_with_red(batch->blorp->isl_dev, ¶ms.dst); + params.x0 *= 3; + params.x1 *= 3; + } + if (isl_format_is_compressed(params.dst.surf.format)) { blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev, ¶ms.dst, NULL, NULL, NULL, NULL); @@ -480,7 +519,46 @@ * 512 but a maximum 3D texture size is much larger. */ params.num_layers = MIN2(params.dst.view.array_len, num_layers); - batch->blorp->exec(batch, ¶ms); + + const unsigned max_image_width = 16 * 1024; + if (params.dst.surf.logical_level0_px.width > max_image_width) { + /* Clearing an RGB image as red multiplies the surface width by 3 + * so it may now be too wide for the hardware surface limits. We + * have to break the clear up into pieces in order to clear wide + * images. + */ + assert(clear_rgb_as_red); + assert(params.dst.surf.dim == ISL_SURF_DIM_2D); + assert(params.dst.surf.tiling == ISL_TILING_LINEAR); + assert(params.dst.surf.logical_level0_px.depth == 1); + assert(params.dst.surf.logical_level0_px.array_len == 1); + assert(params.dst.surf.levels == 1); + assert(params.dst.surf.samples == 1); + assert(params.dst.tile_x_sa == 0 || params.dst.tile_y_sa == 0); + assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE); + + /* max_image_width rounded down to a multiple of 3 */ + const unsigned max_fake_rgb_width = (max_image_width / 3) * 3; + const unsigned cpp = + isl_format_get_layout(params.dst.surf.format)->bpb / 8; + + params.dst.surf.logical_level0_px.width = max_fake_rgb_width; + params.dst.surf.phys_level0_sa.width = max_fake_rgb_width; + + uint32_t orig_x0 = params.x0, orig_x1 = params.x1; + uint64_t orig_offset = params.dst.addr.offset; + for (uint32_t x = orig_x0; x < orig_x1; x += max_fake_rgb_width) { + /* Offset to the surface. It's easy because we're linear */ + params.dst.addr.offset = orig_offset + x * cpp; + + params.x0 = 0; + params.x1 = MIN2(orig_x1 - x, max_image_width); + + batch->blorp->exec(batch, ¶ms); + } + } else { + batch->blorp->exec(batch, ¶ms); + } start_layer += params.num_layers; num_layers -= params.num_layers; @@ -511,7 +589,7 @@ * we disable statistics in 3DSTATE_WM. Give it the usual clear shader * to work around the issue. */ - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, false)) + if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, false, false)) return; } @@ -751,7 +829,7 @@ * is tiled or not, we have to assume it may be linear. This means no * SIMD16_REPDATA for us. :-( */ - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, false)) + if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, false, false)) return; } @@ -836,7 +914,7 @@ * color" message. */ - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true)) + if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true, false)) return; batch->blorp->exec(batch, ¶ms); @@ -1114,7 +1192,7 @@ memset(¶ms.wm_inputs.clear_color, 0, sizeof(params.wm_inputs.clear_color)); - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true)) + if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true, false)) return; batch->blorp->exec(batch, ¶ms); diff -Nru mesa-18.1.3/src/intel/blorp/blorp_priv.h mesa-18.1.5/src/intel/blorp/blorp_priv.h --- mesa-18.1.3/src/intel/blorp/blorp_priv.h 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/blorp/blorp_priv.h 2018-07-27 13:52:17.000000000 +0000 @@ -76,6 +76,9 @@ blorp_surf_convert_to_single_slice(const struct isl_device *isl_dev, struct brw_blorp_surface_info *info); void +surf_fake_rgb_with_red(const struct isl_device *isl_dev, + struct brw_blorp_surface_info *info); +void blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev, struct brw_blorp_surface_info *info, uint32_t *x, uint32_t *y, diff -Nru mesa-18.1.3/src/intel/compiler/brw_eu_validate.c mesa-18.1.5/src/intel/compiler/brw_eu_validate.c --- mesa-18.1.3/src/intel/compiler/brw_eu_validate.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_eu_validate.c 2018-07-27 13:52:17.000000000 +0000 @@ -261,6 +261,17 @@ brw_inst_src0_da_reg_nr(devinfo, inst) < 112, "send with EOT must use g112-g127"); } + + if (devinfo->gen >= 8) { + ERROR_IF(!dst_is_null(devinfo, inst) && + (brw_inst_dst_da_reg_nr(devinfo, inst) + + brw_inst_rlen(devinfo, inst) > 127) && + (brw_inst_src0_da_reg_nr(devinfo, inst) + + brw_inst_mlen(devinfo, inst) > + brw_inst_dst_da_reg_nr(devinfo, inst)), + "r127 must not be used for return address when there is " + "a src and dest overlap"); + } } return error_msg; diff -Nru mesa-18.1.3/src/intel/compiler/brw_fs_bank_conflicts.cpp mesa-18.1.5/src/intel/compiler/brw_fs_bank_conflicts.cpp --- mesa-18.1.3/src/intel/compiler/brw_fs_bank_conflicts.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_fs_bank_conflicts.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -540,6 +540,18 @@ for (unsigned reg = 0; reg < 2; reg++) constrained[p.atom_of_reg(reg)] = true; + /* At Intel Broadwell PRM, vol 07, section "Instruction Set Reference", + * subsection "EUISA Instructions", Send Message (page 990): + * + * "r127 must not be used for return address when there is a src and + * dest overlap in send instruction." + * + * Register allocation ensures that, so don't move 127 around to avoid + * breaking that property. + */ + if (v->devinfo->gen >= 8) + constrained[p.atom_of_reg(127)] = true; + foreach_block_and_inst(block, fs_inst, inst, v->cfg) { /* Assume that anything referenced via fixed GRFs is baked into the * hardware's fixed-function logic and may be unsafe to move around. diff -Nru mesa-18.1.3/src/intel/compiler/brw_fs.cpp mesa-18.1.5/src/intel/compiler/brw_fs.cpp --- mesa-18.1.3/src/intel/compiler/brw_fs.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_fs.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -2364,10 +2364,19 @@ break; if (inst->saturate) { - if (inst->dst.type != inst->src[0].type) + /* Full mixed-type saturates don't happen. However, we can end up + * with things like: + * + * mov.sat(8) g21<1>DF -1F + * + * Other mixed-size-but-same-base-type cases may also be possible. + */ + if (inst->dst.type != inst->src[0].type && + inst->dst.type != BRW_REGISTER_TYPE_DF && + inst->src[0].type != BRW_REGISTER_TYPE_F) assert(!"unimplemented: saturate mixed types"); - if (brw_saturate_immediate(inst->dst.type, + if (brw_saturate_immediate(inst->src[0].type, &inst->src[0].as_brw_reg())) { inst->saturate = false; progress = true; @@ -5588,16 +5597,49 @@ * after \p inst, inst->next is a moving target and we need to save * it off here so that we insert the zip instructions in the right * place. + * + * Since we're inserting split instructions after after_inst, the + * instructions will end up in the reverse order that we insert them. + * However, certain render target writes require that the low group + * instructions come before the high group. From the Ivy Bridge PRM + * Vol. 4, Pt. 1, Section 3.9.11: + * + * "If multiple SIMD8 Dual Source messages are delivered by the + * pixel shader thread, each SIMD8_DUALSRC_LO message must be + * issued before the SIMD8_DUALSRC_HI message with the same Slot + * Group Select setting." + * + * And, from Section 3.9.11.1 of the same PRM: + * + * "When SIMD32 or SIMD16 PS threads send render target writes + * with multiple SIMD8 and SIMD16 messages, the following must + * hold: + * + * All the slots (as described above) must have a corresponding + * render target write irrespective of the slot's validity. A slot + * is considered valid when at least one sample is enabled. For + * example, a SIMD16 PS thread must send two SIMD8 render target + * writes to cover all the slots. + * + * PS thread must send SIMD render target write messages with + * increasing slot numbers. For example, SIMD16 thread has + * Slot[15:0] and if two SIMD8 render target writes are used, the + * first SIMD8 render target write must send Slot[7:0] and the + * next one must send Slot[15:8]." + * + * In order to make low group instructions come before high group + * instructions (this is required for some render target writes), we + * split from the highest group to lowest. */ exec_node *const after_inst = inst->next; - for (unsigned i = 0; i < n; i++) { + for (int i = n - 1; i >= 0; i--) { /* Emit a copy of the original instruction with the lowered width. * If the EOT flag was set throw it away except for the last * instruction to avoid killing the thread prematurely. */ fs_inst split_inst = *inst; split_inst.exec_size = lower_width; - split_inst.eot = inst->eot && i == 0; + split_inst.eot = inst->eot && i == n - 1; /* Select the correct channel enables for the i-th group, then * transform the sources and destination and emit the lowered diff -Nru mesa-18.1.3/src/intel/compiler/brw_fs_nir.cpp mesa-18.1.5/src/intel/compiler/brw_fs_nir.cpp --- mesa-18.1.3/src/intel/compiler/brw_fs_nir.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_fs_nir.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -67,14 +67,25 @@ vec4s[loc] = MAX2(vec4s[loc], var_vec4s); } - nir_foreach_variable(var, &nir->outputs) { - const int loc = var->data.driver_location; - if (outputs[loc].file == BAD_FILE) { - fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s[loc]); - for (unsigned i = 0; i < vec4s[loc]; i++) { - outputs[loc + i] = offset(reg, bld, 4 * i); - } + for (unsigned loc = 0; loc < ARRAY_SIZE(vec4s);) { + if (vec4s[loc] == 0) { + loc++; + continue; } + + unsigned reg_size = vec4s[loc]; + + /* Check if there are any ranges that start within this range and extend + * past it. If so, include them in this allocation. + */ + for (unsigned i = 1; i < reg_size; i++) + reg_size = MAX2(vec4s[i + loc] + i, reg_size); + + fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * reg_size); + for (unsigned i = 0; i < reg_size; i++) + outputs[loc + i] = offset(reg, bld, 4 * i); + + loc += reg_size; } } @@ -810,11 +821,20 @@ case nir_op_fsign: { if (op[0].abs) { - /* Straightforward since the source can be assumed to be - * non-negative. + /* Straightforward since the source can be assumed to be either + * strictly >= 0 or strictly <= 0 depending on the setting of the + * negate flag. */ set_condmod(BRW_CONDITIONAL_NZ, bld.MOV(result, op[0])); - set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(result, brw_imm_f(1.0f))); + + inst = (op[0].negate) + ? bld.MOV(result, brw_imm_f(-1.0f)) + : bld.MOV(result, brw_imm_f(1.0f)); + + set_predicate(BRW_PREDICATE_NORMAL, inst); + + if (instr->dest.saturate) + inst->saturate = true; } else if (type_sz(op[0].type) < 8) { /* AND(val, 0x80000000) gives the sign bit. diff -Nru mesa-18.1.3/src/intel/compiler/brw_fs_reg_allocate.cpp mesa-18.1.5/src/intel/compiler/brw_fs_reg_allocate.cpp --- mesa-18.1.3/src/intel/compiler/brw_fs_reg_allocate.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_fs_reg_allocate.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -548,6 +548,9 @@ int first_mrf_hack_node = node_count; if (devinfo->gen >= 7) node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START; + int grf127_send_hack_node = node_count; + if (devinfo->gen >= 8) + node_count ++; struct ra_graph *g = ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count); @@ -652,6 +655,45 @@ } } } + + if (devinfo->gen >= 8) { + /* At Intel Broadwell PRM, vol 07, section "Instruction Set Reference", + * subsection "EUISA Instructions", Send Message (page 990): + * + * "r127 must not be used for return address when there is a src and + * dest overlap in send instruction." + * + * We are avoiding using grf127 as part of the destination of send + * messages adding a node interference to the grf127_send_hack_node. + * This node has a fixed asignment to grf127. + * + * We don't apply it to SIMD16 because previous code avoids any register + * overlap between sources and destination. + */ + ra_set_node_reg(g, grf127_send_hack_node, 127); + if (dispatch_width == 8) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->is_send_from_grf() && inst->dst.file == VGRF) + ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node); + } + } + + if (spilled_any_registers) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { + /* Spilling instruction are genereated as SEND messages from MRF + * but as Gen7+ supports sending from GRF the driver will maps + * assingn these MRF registers to a GRF. Implementations reuses + * the dest of the send message as source. So as we will have an + * overlap for sure, we create an interference between destination + * and grf127. + */ + if ((inst->opcode == SHADER_OPCODE_GEN7_SCRATCH_READ || + inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ) && + inst->dst.file == VGRF) + ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node); + } + } + } /* Debug of register spilling: Go spill everything. */ if (unlikely(spill_all)) { diff -Nru mesa-18.1.3/src/intel/compiler/brw_nir_analyze_ubo_ranges.c mesa-18.1.5/src/intel/compiler/brw_nir_analyze_ubo_ranges.c --- mesa-18.1.3/src/intel/compiler/brw_nir_analyze_ubo_ranges.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_nir_analyze_ubo_ranges.c 2018-07-27 13:52:17.000000000 +0000 @@ -124,12 +124,29 @@ continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic == nir_intrinsic_load_uniform) + switch (intrin->intrinsic) { + case nir_intrinsic_load_uniform: + case nir_intrinsic_image_var_load: + case nir_intrinsic_image_var_store: + case nir_intrinsic_image_var_atomic_add: + case nir_intrinsic_image_var_atomic_min: + case nir_intrinsic_image_var_atomic_max: + case nir_intrinsic_image_var_atomic_and: + case nir_intrinsic_image_var_atomic_or: + case nir_intrinsic_image_var_atomic_xor: + case nir_intrinsic_image_var_atomic_exchange: + case nir_intrinsic_image_var_atomic_comp_swap: + case nir_intrinsic_image_var_size: state->uses_regular_uniforms = true; - - if (intrin->intrinsic != nir_intrinsic_load_ubo) continue; + case nir_intrinsic_load_ubo: + break; /* Fall through to the analysis below */ + + default: + continue; /* Not a uniform or UBO intrinsic */ + } + nir_const_value *block_const = nir_src_as_const_value(intrin->src[0]); nir_const_value *offset_const = nir_src_as_const_value(intrin->src[1]); @@ -167,6 +184,7 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, nir_shader *nir, + const struct brw_vs_prog_key *vs_key, struct brw_ubo_range out_ranges[4]) { const struct gen_device_info *devinfo = compiler->devinfo; @@ -185,6 +203,23 @@ _mesa_hash_table_create(mem_ctx, NULL, _mesa_key_pointer_equal), }; + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: + if (vs_key && vs_key->nr_userclip_plane_consts > 0) + state.uses_regular_uniforms = true; + break; + + case MESA_SHADER_COMPUTE: + /* Compute shaders use push constants to get the subgroup ID so it's + * best to just assume some system values are pushed. + */ + state.uses_regular_uniforms = true; + break; + + default: + break; + } + /* Walk the IR, recording how many times each UBO block/offset is used. */ nir_foreach_function(function, nir) { if (function->impl) { diff -Nru mesa-18.1.3/src/intel/compiler/brw_nir.h mesa-18.1.5/src/intel/compiler/brw_nir.h --- mesa-18.1.3/src/intel/compiler/brw_nir.h 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_nir.h 2018-07-27 13:52:17.000000000 +0000 @@ -148,6 +148,7 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, nir_shader *nir, + const struct brw_vs_prog_key *vs_key, struct brw_ubo_range out_ranges[4]); bool brw_nir_opt_peephole_ffma(nir_shader *shader); diff -Nru mesa-18.1.3/src/intel/compiler/brw_shader.cpp mesa-18.1.5/src/intel/compiler/brw_shader.cpp --- mesa-18.1.3/src/intel/compiler/brw_shader.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_shader.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -986,7 +986,8 @@ (devinfo->gen < 6 && ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP && - opcode != FS_OPCODE_CINTERP))); + opcode != FS_OPCODE_CINTERP))) || + (opcode == FS_OPCODE_LINTERP && !devinfo->has_pln); } bool diff -Nru mesa-18.1.3/src/intel/compiler/brw_vec4_cmod_propagation.cpp mesa-18.1.5/src/intel/compiler/brw_vec4_cmod_propagation.cpp --- mesa-18.1.3/src/intel/compiler/brw_vec4_cmod_propagation.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_vec4_cmod_propagation.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -36,6 +36,17 @@ namespace brw { static bool +writemasks_incompatible(const vec4_instruction *earlier, + const vec4_instruction *later) +{ + return (earlier->dst.writemask != WRITEMASK_X && + earlier->dst.writemask != WRITEMASK_XYZW) || + (earlier->dst.writemask == WRITEMASK_XYZW && + later->src[0].swizzle != BRW_SWIZZLE_XYZW) || + (later->dst.writemask & ~earlier->dst.writemask) != 0; +} + +static bool opt_cmod_propagation_local(bblock_t *block) { bool progress = false; @@ -82,6 +93,9 @@ if (scan_inst->opcode != BRW_OPCODE_ADD) goto not_match; + if (writemasks_incompatible(scan_inst, inst)) + goto not_match; + /* A CMP is basically a subtraction. The result of the * subtraction must be the same as the result of the addition. * This means that one of the operands must be negated. So (a + @@ -132,11 +146,7 @@ scan_inst->dst, scan_inst->size_written)) { if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) || scan_inst->dst.offset != inst->src[0].offset || - (scan_inst->dst.writemask != WRITEMASK_X && - scan_inst->dst.writemask != WRITEMASK_XYZW) || - (scan_inst->dst.writemask == WRITEMASK_XYZW && - inst->src[0].swizzle != BRW_SWIZZLE_XYZW) || - (inst->dst.writemask & ~scan_inst->dst.writemask) != 0 || + writemasks_incompatible(scan_inst, inst) || scan_inst->exec_size != inst->exec_size || scan_inst->group != inst->group) { break; diff -Nru mesa-18.1.3/src/intel/compiler/brw_vec4.cpp mesa-18.1.5/src/intel/compiler/brw_vec4.cpp --- mesa-18.1.3/src/intel/compiler/brw_vec4.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_vec4.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -799,10 +799,19 @@ break; if (inst->saturate) { - if (inst->dst.type != inst->src[0].type) + /* Full mixed-type saturates don't happen. However, we can end up + * with things like: + * + * mov.sat(8) g21<1>DF -1F + * + * Other mixed-size-but-same-base-type cases may also be possible. + */ + if (inst->dst.type != inst->src[0].type && + inst->dst.type != BRW_REGISTER_TYPE_DF && + inst->src[0].type != BRW_REGISTER_TYPE_F) assert(!"unimplemented: saturate mixed types"); - if (brw_saturate_immediate(inst->dst.type, + if (brw_saturate_immediate(inst->src[0].type, &inst->src[0].as_brw_reg())) { inst->saturate = false; progress = true; diff -Nru mesa-18.1.3/src/intel/compiler/brw_vec4_nir.cpp mesa-18.1.5/src/intel/compiler/brw_vec4_nir.cpp --- mesa-18.1.3/src/intel/compiler/brw_vec4_nir.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/brw_vec4_nir.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -1806,7 +1806,23 @@ unreachable("not reached: should have been lowered"); case nir_op_fsign: - if (type_sz(op[0].type) < 8) { + if (op[0].abs) { + /* Straightforward since the source can be assumed to be either + * strictly >= 0 or strictly <= 0 depending on the setting of the + * negate flag. + */ + inst = emit(MOV(dst, op[0])); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = (op[0].negate) + ? emit(MOV(dst, brw_imm_f(-1.0f))) + : emit(MOV(dst, brw_imm_f(1.0f))); + inst->predicate = BRW_PREDICATE_NORMAL; + + if (instr->dest.saturate) + inst->saturate = true; + + } else if (type_sz(op[0].type) < 8) { /* AND(val, 0x80000000) gives the sign bit. * * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not diff -Nru mesa-18.1.3/src/intel/compiler/test_vec4_cmod_propagation.cpp mesa-18.1.5/src/intel/compiler/test_vec4_cmod_propagation.cpp --- mesa-18.1.3/src/intel/compiler/test_vec4_cmod_propagation.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/compiler/test_vec4_cmod_propagation.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -821,3 +821,75 @@ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); } + +TEST_F(cmod_propagation_test, add_cmp_same_dst_writemask) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + dst_reg dest_null = bld.null_reg_f(); + + bld.ADD(dest, src0, src1); + vec4_instruction *inst = bld.CMP(dest_null, src0, src1, BRW_CONDITIONAL_GE); + inst->src[1].negate = true; + + /* = Before = + * + * 0: add dest.xyzw src0 src1 + * 1: cmp.ge.f0 null.xyzw src0 -src1 + * + * = After = + * 0: add.ge.f0 dest.xyzw src0 src1 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, add_cmp_different_dst_writemask) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + dst_reg dest_null = bld.null_reg_f(); + + bld.ADD(dest, src0, src1); + vec4_instruction *inst = bld.CMP(dest_null, src0, src1, BRW_CONDITIONAL_GE); + inst->src[1].negate = true; + + /* = Before = + * + * 0: add dest.x src0 src1 + * 1: cmp.ge.f0 null.xyzw src0 -src1 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} diff -Nru mesa-18.1.3/src/intel/Makefile.in mesa-18.1.5/src/intel/Makefile.in --- mesa-18.1.3/src/intel/Makefile.in 2018-06-29 17:47:42.000000000 +0000 +++ mesa-18.1.5/src/intel/Makefile.in 2018-07-27 13:52:30.000000000 +0000 @@ -305,7 +305,10 @@ @HAVE_INTEL_VULKAN_TRUE@am__append_7 = vulkan/libvulkan-test.la @HAVE_INTEL_VULKAN_TRUE@am__append_8 = $(VULKAN_PER_GEN_LIBS) \ @HAVE_INTEL_VULKAN_TRUE@ vulkan/libvulkan_common.la -@HAVE_INTEL_VULKAN_TRUE@@HAVE_PLATFORM_ANDROID_TRUE@am__append_9 = $(ANDROID_CPPFLAGS) +@HAVE_INTEL_VULKAN_TRUE@@HAVE_PLATFORM_ANDROID_TRUE@am__append_9 = \ +@HAVE_INTEL_VULKAN_TRUE@@HAVE_PLATFORM_ANDROID_TRUE@ $(ANDROID_CPPFLAGS) \ +@HAVE_INTEL_VULKAN_TRUE@@HAVE_PLATFORM_ANDROID_TRUE@ -DVK_USE_PLATFORM_ANDROID_KHR + @HAVE_INTEL_VULKAN_TRUE@@HAVE_PLATFORM_ANDROID_TRUE@am__append_10 = $(ANDROID_CFLAGS) @HAVE_INTEL_VULKAN_TRUE@@HAVE_PLATFORM_ANDROID_TRUE@am__append_11 = $(ANDROID_LIBS) @HAVE_INTEL_VULKAN_TRUE@@HAVE_PLATFORM_ANDROID_TRUE@am__append_12 = $(VULKAN_ANDROID_FILES) diff -Nru mesa-18.1.3/src/intel/Makefile.vulkan.am mesa-18.1.5/src/intel/Makefile.vulkan.am --- mesa-18.1.3/src/intel/Makefile.vulkan.am 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/Makefile.vulkan.am 2018-07-27 13:52:17.000000000 +0000 @@ -164,7 +164,10 @@ -lm if HAVE_PLATFORM_ANDROID -VULKAN_CPPFLAGS += $(ANDROID_CPPFLAGS) +VULKAN_CPPFLAGS += \ + $(ANDROID_CPPFLAGS) \ + -DVK_USE_PLATFORM_ANDROID_KHR + VULKAN_CFLAGS += $(ANDROID_CFLAGS) VULKAN_LIB_DEPS += $(ANDROID_LIBS) VULKAN_SOURCES += $(VULKAN_ANDROID_FILES) diff -Nru mesa-18.1.3/src/intel/vulkan/anv_android.c mesa-18.1.5/src/intel/vulkan/anv_android.c --- mesa-18.1.3/src/intel/vulkan/anv_android.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/vulkan/anv_android.c 2018-07-27 13:52:17.000000000 +0000 @@ -174,7 +174,7 @@ goto fail_create; if (bo->size < image->size) { - result = vk_errorf(device, device->instance, + result = vk_errorf(device->instance, device, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, "dma-buf from VkNativeBufferANDROID is too small for " "VkImage: %"PRIu64"B < %"PRIu64"B", diff -Nru mesa-18.1.3/src/intel/vulkan/anv_cmd_buffer.c mesa-18.1.5/src/intel/vulkan/anv_cmd_buffer.c --- mesa-18.1.3/src/intel/vulkan/anv_cmd_buffer.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/vulkan/anv_cmd_buffer.c 2018-07-27 13:52:17.000000000 +0000 @@ -153,6 +153,20 @@ anv_cmd_state_init(cmd_buffer); } +/** + * This function updates the size of the push constant buffer we need to emit. + * This is called in various parts of the driver to ensure that different + * pieces of push constant data get emitted as needed. However, it is important + * that we never shrink the size of the buffer. For example, a compute shader + * dispatch will always call this for the base group id, which has an + * offset in the push constant buffer that is smaller than the offset for + * storage image data. If the compute shader has storage images, we will call + * this again with a larger size during binding table emission. However, + * if we dispatch the compute shader again without dirtying our descriptors, + * we would still call this function with a smaller size for the base group + * id, and not for the images, which would incorrectly shrink the size of the + * push constant data we emit with that dispatch, making us drop the image data. + */ VkResult anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, uint32_t size) @@ -166,6 +180,7 @@ anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + (*ptr)->size = size; } else if ((*ptr)->size < size) { *ptr = vk_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -173,8 +188,8 @@ anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + (*ptr)->size = size; } - (*ptr)->size = size; return VK_SUCCESS; } @@ -598,7 +613,7 @@ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); - assert(firstSet + descriptorSetCount < MAX_SETS); + assert(firstSet + descriptorSetCount <= MAX_SETS); for (uint32_t i = 0; i < descriptorSetCount; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); diff -Nru mesa-18.1.3/src/intel/vulkan/anv_descriptor_set.c mesa-18.1.5/src/intel/vulkan/anv_descriptor_set.c --- mesa-18.1.3/src/intel/vulkan/anv_descriptor_set.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/vulkan/anv_descriptor_set.c 2018-07-27 13:52:17.000000000 +0000 @@ -257,13 +257,48 @@ anv_descriptor_set_layout_unref(device, set_layout); } +#define SHA1_UPDATE_VALUE(ctx, x) _mesa_sha1_update(ctx, &(x), sizeof(x)); + +static void +sha1_update_immutable_sampler(struct mesa_sha1 *ctx, + const struct anv_sampler *sampler) +{ + if (!sampler->conversion) + return; + + /* The only thing that affects the shader is ycbcr conversion */ + _mesa_sha1_update(ctx, sampler->conversion, + sizeof(*sampler->conversion)); +} + +static void +sha1_update_descriptor_set_binding_layout(struct mesa_sha1 *ctx, + const struct anv_descriptor_set_binding_layout *layout) +{ + SHA1_UPDATE_VALUE(ctx, layout->array_size); + SHA1_UPDATE_VALUE(ctx, layout->descriptor_index); + SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_index); + SHA1_UPDATE_VALUE(ctx, layout->buffer_index); + _mesa_sha1_update(ctx, layout->stage, sizeof(layout->stage)); + + if (layout->immutable_samplers) { + for (uint16_t i = 0; i < layout->array_size; i++) + sha1_update_immutable_sampler(ctx, layout->immutable_samplers[i]); + } +} + static void sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx, const struct anv_descriptor_set_layout *layout) { - size_t size = sizeof(*layout) + - sizeof(layout->binding[0]) * layout->binding_count; - _mesa_sha1_update(ctx, layout, size); + SHA1_UPDATE_VALUE(ctx, layout->binding_count); + SHA1_UPDATE_VALUE(ctx, layout->size); + SHA1_UPDATE_VALUE(ctx, layout->shader_stages); + SHA1_UPDATE_VALUE(ctx, layout->buffer_count); + SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_count); + + for (uint16_t i = 0; i < layout->binding_count; i++) + sha1_update_descriptor_set_binding_layout(ctx, &layout->binding[i]); } /* diff -Nru mesa-18.1.3/src/intel/vulkan/anv_pipeline.c mesa-18.1.5/src/intel/vulkan/anv_pipeline.c --- mesa-18.1.3/src/intel/vulkan/anv_pipeline.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/vulkan/anv_pipeline.c 2018-07-27 13:52:17.000000000 +0000 @@ -452,7 +452,7 @@ anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, map); if (stage != MESA_SHADER_COMPUTE) - brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->ubo_ranges); + brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); assert(nir->num_uniforms == prog_data->nr_params * 4); diff -Nru mesa-18.1.3/src/intel/vulkan/anv_private.h mesa-18.1.5/src/intel/vulkan/anv_private.h --- mesa-18.1.3/src/intel/vulkan/anv_private.h 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/vulkan/anv_private.h 2018-07-27 13:52:17.000000000 +0000 @@ -1571,6 +1571,9 @@ pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; break; + case VK_ACCESS_MEMORY_WRITE_BIT: + pipe_bits |= ANV_PIPE_FLUSH_BITS; + break; default: break; /* Nothing to do */ } @@ -1601,6 +1604,12 @@ case VK_ACCESS_TRANSFER_READ_BIT: pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; break; + case VK_ACCESS_MEMORY_READ_BIT: + pipe_bits |= ANV_PIPE_INVALIDATE_BITS; + break; + case VK_ACCESS_MEMORY_WRITE_BIT: + pipe_bits |= ANV_PIPE_FLUSH_BITS; + break; default: break; /* Nothing to do */ } diff -Nru mesa-18.1.3/src/intel/vulkan/genX_cmd_buffer.c mesa-18.1.5/src/intel/vulkan/genX_cmd_buffer.c --- mesa-18.1.3/src/intel/vulkan/genX_cmd_buffer.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/vulkan/genX_cmd_buffer.c 2018-07-27 13:52:17.000000000 +0000 @@ -67,6 +67,11 @@ { struct anv_device *device = cmd_buffer->device; + /* If we are emitting a new state base address we probably need to re-emit + * binding tables. + */ + cmd_buffer->state.descriptors_dirty |= ~0; + /* Emit a render target cache flush. * * This isn't documented anywhere in the PRM. However, it seems to be @@ -3021,6 +3026,8 @@ curbe.CURBEDataStartAddress = push_state.offset; } } + + cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; } cmd_buffer->state.compute.pipeline_dirty = false; diff -Nru mesa-18.1.3/src/intel/vulkan/genX_pipeline.c mesa-18.1.5/src/intel/vulkan/genX_pipeline.c --- mesa-18.1.3/src/intel/vulkan/genX_pipeline.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/intel/vulkan/genX_pipeline.c 2018-07-27 13:52:17.000000000 +0000 @@ -1393,6 +1393,28 @@ wm.EarlyDepthStencilControl = EDSC_NORMAL; } +#if GEN_GEN >= 8 + /* Gen8 hardware tries to compute ThreadDispatchEnable for us but + * doesn't take into account KillPixels when no depth or stencil + * writes are enabled. In order for occlusion queries to work + * correctly with no attachments, we need to force-enable PS thread + * dispatch. + * + * The BDW docs are pretty clear that that this bit isn't validated + * and probably shouldn't be used in production: + * + * "This must always be set to Normal. This field should not be + * tested for functional validation." + * + * Unfortunately, however, the other mechanism we have for doing this + * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW. + * Given two bad options, we choose the one which works. + */ + if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) && + !has_color_buffer_write_enabled(pipeline, blend)) + wm.ForceThreadDispatchEnable = ForceON; +#endif + wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes; @@ -1568,37 +1590,6 @@ ps.PixelShaderKillsPixel = subpass->has_ds_self_dep || wm_prog_data->uses_kill; - /* The stricter cross-primitive coherency guarantees that the hardware - * gives us with the "Accesses UAV" bit set for at least one shader stage - * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are - * redundant within the current image, atomic counter and SSBO GL APIs, - * which all have very loose ordering and coherency requirements and - * generally rely on the application to insert explicit barriers when a - * shader invocation is expected to see the memory writes performed by the - * invocations of some previous primitive. Regardless of the value of - * "UAV coherency required", the "Accesses UAV" bits will implicitly cause - * an in most cases useless DC flush when the lowermost stage with the bit - * set finishes execution. - * - * It would be nice to disable it, but in some cases we can't because on - * Gen8+ it also has an influence on rasterization via the PS UAV-only - * signal (which could be set independently from the coherency mechanism - * in the 3DSTATE_WM command on Gen7), and because in some cases it will - * determine whether the hardware skips execution of the fragment shader - * or not via the ThreadDispatchEnable signal. However if we know that - * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and - * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any - * difference so we may just disable it here. - * - * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't - * take into account KillPixels when no depth or stencil writes are - * enabled. In order for occlusion queries to work correctly with no - * attachments, we need to force-enable here. - */ - if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) && - !has_color_buffer_write_enabled(pipeline, blend)) - ps.PixelShaderHasUAV = true; - #if GEN_GEN >= 9 ps.PixelShaderPullsBary = wm_prog_data->pulls_bary; ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ? diff -Nru mesa-18.1.3/src/mesa/drivers/dri/i965/brw_gs.c mesa-18.1.5/src/mesa/drivers/dri/i965/brw_gs.c --- mesa-18.1.3/src/mesa/drivers/dri/i965/brw_gs.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/mesa/drivers/dri/i965/brw_gs.c 2018-07-27 13:52:17.000000000 +0000 @@ -94,7 +94,7 @@ brw_nir_setup_glsl_uniforms(mem_ctx, gp->program.nir, &gp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_GEOMETRY]); - brw_nir_analyze_ubo_ranges(compiler, gp->program.nir, + brw_nir_analyze_ubo_ranges(compiler, gp->program.nir, NULL, prog_data.base.base.ubo_ranges); uint64_t outputs_written = gp->program.nir->info.outputs_written; diff -Nru mesa-18.1.3/src/mesa/drivers/dri/i965/brw_link.cpp mesa-18.1.5/src/mesa/drivers/dri/i965/brw_link.cpp --- mesa-18.1.3/src/mesa/drivers/dri/i965/brw_link.cpp 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/mesa/drivers/dri/i965/brw_link.cpp 2018-07-27 13:52:17.000000000 +0000 @@ -304,6 +304,8 @@ NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo, prog->nir->info.num_abos); + nir_sweep(prog->nir); + infos[stage] = &prog->nir->info; update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]); diff -Nru mesa-18.1.3/src/mesa/drivers/dri/i965/brw_tcs.c mesa-18.1.5/src/mesa/drivers/dri/i965/brw_tcs.c --- mesa-18.1.3/src/mesa/drivers/dri/i965/brw_tcs.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/mesa/drivers/dri/i965/brw_tcs.c 2018-07-27 13:52:17.000000000 +0000 @@ -185,7 +185,7 @@ brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_CTRL]); - brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir, + brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir, NULL, prog_data.base.base.ubo_ranges); } else { /* Upload the Patch URB Header as the first two uniforms. diff -Nru mesa-18.1.3/src/mesa/drivers/dri/i965/brw_tes.c mesa-18.1.5/src/mesa/drivers/dri/i965/brw_tes.c --- mesa-18.1.3/src/mesa/drivers/dri/i965/brw_tes.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/mesa/drivers/dri/i965/brw_tes.c 2018-07-27 13:52:17.000000000 +0000 @@ -85,7 +85,7 @@ brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); - brw_nir_analyze_ubo_ranges(compiler, tep->program.nir, + brw_nir_analyze_ubo_ranges(compiler, tep->program.nir, NULL, prog_data.base.base.ubo_ranges); int st_index = -1; diff -Nru mesa-18.1.3/src/mesa/drivers/dri/i965/brw_vs.c mesa-18.1.5/src/mesa/drivers/dri/i965/brw_vs.c --- mesa-18.1.3/src/mesa/drivers/dri/i965/brw_vs.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/mesa/drivers/dri/i965/brw_vs.c 2018-07-27 13:52:17.000000000 +0000 @@ -181,7 +181,7 @@ brw_nir_setup_glsl_uniforms(mem_ctx, vp->program.nir, &vp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_VERTEX]); - brw_nir_analyze_ubo_ranges(compiler, vp->program.nir, + brw_nir_analyze_ubo_ranges(compiler, vp->program.nir, key, prog_data.base.base.ubo_ranges); } else { brw_nir_setup_arb_uniforms(mem_ctx, vp->program.nir, &vp->program, diff -Nru mesa-18.1.3/src/mesa/drivers/dri/i965/brw_wm.c mesa-18.1.5/src/mesa/drivers/dri/i965/brw_wm.c --- mesa-18.1.3/src/mesa/drivers/dri/i965/brw_wm.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/mesa/drivers/dri/i965/brw_wm.c 2018-07-27 13:52:17.000000000 +0000 @@ -149,7 +149,7 @@ brw_nir_setup_glsl_uniforms(mem_ctx, fp->program.nir, &fp->program, &prog_data.base, true); brw_nir_analyze_ubo_ranges(brw->screen->compiler, fp->program.nir, - prog_data.base.ubo_ranges); + NULL, prog_data.base.ubo_ranges); } else { brw_nir_setup_arb_uniforms(mem_ctx, fp->program.nir, &fp->program, &prog_data.base); diff -Nru mesa-18.1.3/src/mesa/drivers/dri/i965/brw_wm_surface_state.c mesa-18.1.5/src/mesa/drivers/dri/i965/brw_wm_surface_state.c --- mesa-18.1.3/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 2018-07-27 13:52:17.000000000 +0000 @@ -224,7 +224,7 @@ if (use_clear_address) { /* Make sure the offset is aligned with a cacheline. */ assert((clear_offset & 0x3f) == 0); - uint32_t *clear_address = + uint64_t *clear_address = state + brw->isl_dev.ss.clear_color_state_offset; *clear_address = brw_state_reloc(&brw->batch, *surf_offset + diff -Nru mesa-18.1.3/src/mesa/state_tracker/st_cb_eglimage.c mesa-18.1.5/src/mesa/state_tracker/st_cb_eglimage.c --- mesa-18.1.3/src/mesa/state_tracker/st_cb_eglimage.c 2018-06-29 17:47:30.000000000 +0000 +++ mesa-18.1.5/src/mesa/state_tracker/st_cb_eglimage.c 2018-07-27 13:52:17.000000000 +0000 @@ -229,6 +229,8 @@ pipe_resource_reference(&stObj->pt, stimg->texture); st_texture_release_all_sampler_views(st, stObj); pipe_resource_reference(&stImage->pt, stObj->pt); + if (st->pipe->screen->resource_changed) + st->pipe->screen->resource_changed(st->pipe->screen, stImage->pt); stObj->surface_format = stimg->format; stObj->level_override = stimg->level; diff -Nru mesa-18.1.3/src/util/disk_cache.h mesa-18.1.5/src/util/disk_cache.h --- mesa-18.1.3/src/util/disk_cache.h 2018-06-29 17:47:30.000000000 +0000 +++ mesa-18.1.5/src/util/disk_cache.h 2018-07-27 13:52:17.000000000 +0000 @@ -24,7 +24,7 @@ #ifndef DISK_CACHE_H #define DISK_CACHE_H -#ifdef ENABLE_SHADER_CACHE +#ifdef HAVE_DLFCN_H #include #endif #include @@ -88,10 +88,10 @@ return buf; } +#ifdef HAVE_DLFCN_H static inline bool disk_cache_get_function_timestamp(void *ptr, uint32_t* timestamp) { -#ifdef ENABLE_SHADER_CACHE Dl_info info; struct stat st; if (!dladdr(ptr, &info) || !info.dli_fname) { @@ -102,10 +102,8 @@ } *timestamp = st.st_mtime; return true; -#else - return false; -#endif } +#endif /* Provide inlined stub functions if the shader cache is disabled. */ diff -Nru mesa-18.1.3/VERSION mesa-18.1.5/VERSION --- mesa-18.1.3/VERSION 2018-06-29 17:47:29.000000000 +0000 +++ mesa-18.1.5/VERSION 2018-07-27 13:52:17.000000000 +0000 @@ -1 +1 @@ -18.1.3 +18.1.5