diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/ChangeLog mesa-17.3.0~git20171212+17.3.49a612d1/ChangeLog --- mesa-17.3.0~git20171121+17.3.d1e6cf46/ChangeLog 2017-11-21 10:20:13.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/ChangeLog 2017-12-12 07:38:05.000000000 +0000 @@ -1,9 +1,739 @@ -commit 0a31c31663780dcab730c66d93dc746e5647b422 +commit 0200a0d5936d7fda2361712999b0899080fe2d60 Author: Rico Tzschichholz -Date: Tue Nov 21 11:19:33 2017 +0100 +Date: Tue Dec 12 08:37:18 2017 +0100 Add debian tree from origin/ubuntu +commit 49a612d1580b3316392273a069d20d93967126a8 +Author: Emil Velikov +Date: Fri Dec 8 13:53:30 2017 +0000 + + docs: add sha256 checksums for 17.3.0 + + Signed-off-by: Emil Velikov + +commit 8d55da9f579463038f4305ed7d505aa7fffa0f37 +Author: Emil Velikov +Date: Fri Dec 8 13:47:33 2017 +0000 + + docs: Update 17.3.0 release notes + + Signed-off-by: Emil Velikov + +commit c4b070d25c023abcb7b07e4d3b0db5c48f756f01 +Author: Emil Velikov +Date: Fri Dec 8 13:30:44 2017 +0000 + + Update version to 17.3.0(final) + + Signed-off-by: Emil Velikov + +commit 30abe7dfaeba73406d63ef54f36969d0e75f83af +Author: Emil Velikov +Date: Mon Dec 4 08:50:46 2017 +0000 + + Update version to 17.3.0-rc6 + + Signed-off-by: Emil Velikov + +commit 5ac9d91ee3d897016d54e970a977c3fbbbe2488e +Author: Jason Ekstrand +Date: Wed Nov 29 16:22:42 2017 -0800 + + i965: Disable regular fast-clears (CCS_D) on gen9+ + + This partially reverts commit 3e57e9494c2279580ad6a83ab8c065d01e7e634e + which caused a bunch of GPU hangs on several Source titles. To date, we + have no clue why these hangs are actually happening. This undoes the + final effect of 3e57e9494c227 and gets us back to not hanging. Tested + with Team Fortress 2. + + Reviewed-by: Kenneth Graunke + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102435 + Fixes: 3e57e9494c2279580ad6a83ab8c065d01e7e634e + Cc: mesa-stable@lists.freedesktop.org + (cherry picked from commit ee57b15ec764736e2d5360beaef9fb2045ed0f68) + +commit 4eae5b39eee45ee9ec58634764a9d2376872d5c8 +Author: Vinson Lee +Date: Tue Nov 28 23:16:58 2017 -0800 + + anv: Check if memfd_create is already defined. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103909 + Signed-off-by: Vinson Lee + Reviewed-by: Eric Engestrom + (cherry picked from commit 8c1e4b1afc8d396ccf99c725c59b29a9aa305557) + [Emil Velikov: drop NA hunks] + Signed-off-by: Emil Velikov + + Conflicts: + configure.ac + meson.build + src/intel/vulkan/anv_allocator.c + +commit 4e84aaa906e7abf41b170146dd7ef3e7a76820b9 +Author: Vadym Shovkoplias +Date: Mon Nov 27 12:15:13 2017 +0200 + + intel/blorp: Fix possible NULL pointer dereferencing + + Fix incomplete check of input params in blorp_surf_convert_to_uncompressed() + which can lead to NULL pointer dereferencing. + + Fixes: 5ae8043fed2 ("intel/blorp: Add an entrypoint for doing + bit-for-bit copies") + Fixes: f395d0abc83 ("intel/blorp: Internally expose + surf_convert_to_uncompressed") + Reviewed-by: Emil Velikov + Reviewed-by: Andres Gomez + + (cherry picked from commit cdb3eb7174f84f3200408c4b43c819fb093da9c6) + +commit bcd4f26b41c97991d1268c308c15c350bd1da318 +Author: Kenneth Graunke +Date: Tue Nov 28 08:58:21 2017 -0800 + + i965: Reorganize batch/state BO fields into a 'brw_growing_bo' struct. + + We're about to add more of them, and need to pass the whole lot of them + around together when growing them. Putting them in a struct makes this + much easier. + + brw->batch.batch.bo is a bit of a mouthful, but it's nice to have things + labeled 'batch' and 'state' now that we have multiple buffers. + + Fixes: 2dfc119f22f257082ab0 "i965: Grow the batch/state buffers if we need space and can't flush." + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103101 + Reviewed-by: Ian Romanick + (cherry picked from commit 74e38739ca266b8178eaa70e30578aa929b067ab) + [Emil Velikov: remove NA blorp_get_surface_base_address hunk] + Signed-off-by: Emil Velikov + + Conflicts: + src/mesa/drivers/dri/i965/genX_blorp_exec.c + +commit 447afbf31b1bdbfc8cb03cfddeef0373cd291cf4 +Author: Kenneth Graunke +Date: Tue Nov 28 08:20:39 2017 -0800 + + i965: Don't grow batch/state buffer on every emit after an overflow. + + Once we reach the intended size of the buffer (BATCH_SZ or STATE_SZ), we + try and flush. If we're not allowed to flush, we resort to growing the + buffer so that there's space for the data we need to emit. + + We accidentally got the threshold wrong. The first non-wrappable call + beyond (e.g.) STATE_SZ would grow the buffer to floor(1.5 * STATE_SZ), + The next call would see we were beyond STATE_SZ and think we needed to + grow a second time - when the buffer was already large enough. + + We still want to flush when we hit STATE_SZ, but for growing, we should + use the actual size of the buffer as the threshold. This way, we only + grow when actually necessary. + + v2: Simplify the control flow (suggested by Jordan) + + Fixes: 2dfc119f22f257082ab0 "i965: Grow the batch/state buffers if we need space and can't flush." + Reviewed-by: Jordan Justen + (cherry picked from commit ca4361658635f2b401e9793c0b982721998ecb70) + +commit 09f6bd5ef27c1b16b1468441b070b60c2d57523d +Author: Kenneth Graunke +Date: Tue Nov 28 08:59:07 2017 -0800 + + i965: Preserve EXEC_OBJECT_CAPTURE when growing the BO. + + The original state buffer was marked with EXEC_OBJECT_CAPTURE. When + growing it, we want to preserve that flag so we continue to capture it + in GPU hang reports. + + Fixes: 2dfc119f22f257082ab0 "i965: Grow the batch/state buffers if we need space and can't flush." + Reviewed-by: Ian Romanick + (cherry picked from commit 52d32917e1f3f70abcbcff5508f7423e94626b41) + +commit a49b70d2ec36f6a420801c41704d9b4d58b57c17 +Author: Kenneth Graunke +Date: Tue Nov 28 08:30:50 2017 -0800 + + i965: Use old_bo->align when growing batch/state buffer instead of 4096. + + The intention here is make the new BO use the same alignment as the old + BO. This isn't strictly necessary, but we would have to update the + 'alignment' field in the validation list when swapping it out, and we + don't bother today. + + The batch and state buffers use an alignment of 4096, so this should be + equivalent - it's just clearer than cut and pasting a magic constant. + + Fixes: 2dfc119f22f257082ab0 "i965: Grow the batch/state buffers if we need space and can't flush." + Reviewed-by: Ian Romanick + Reviewed-by: Jordan Justen + (cherry picked from commit 2af70854609509adf5dc92af2fcf1c30938e2a5d) + +commit f1050f0435aff6956dacf7ea9373b841984c6baf +Author: Kenneth Graunke +Date: Wed Nov 29 00:27:18 2017 -0800 + + i965: Program the dynamic state heap size to MAX_STATE_SIZE. + + STATE_BASE_ADDRESS specifies a maximum size of the dynamic state + section, beyond which data supposedly reads back as 0. On Gen8+, + we were programming it to the size of the buffer. This worked fine + until we started growing the state buffer in commit 2dfc119f22f25708. + When the state buffer grows, the value in STATE_BASE_ADDRESS becomes + too small, and our state beyond STATE_SZ bytes would read back as 0. + + To avoid having to update the value, we program it to MAX_STATE_SIZE. + We used to program the upper bound to the maximum on older hardware + anyway, so programming it too large isn't a big deal. + + Bogus SURFACE_STATE can easily lead to GPU hangs and misrendering. + DiRT Rally was hitting the statebuffer growth path, and suffered from + bad texture corruption and GPU hangs (usually around the same time). + + This patch fixes both issues. + + Fixes: 2dfc119f22f257082ab0 "i965: Grow the batch/state buffers if we need space and can't flush." + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103101 + Tested-by: Jordan Justen + Reviewed-by: Chris Wilson + Reviewed-by: Jordan Justen + Reviewed-by: Jason Ekstrand + (cherry picked from commit cfc5af588cf8e0cfb41ea907a7da3cca676be1c2) + +commit 14e528b2db7338099e7c8d4f9921228ce949fb05 +Author: Marek Olšák +Date: Thu Nov 30 02:16:29 2017 +0100 + + radeonsi/gfx9: fix importing shared textures with DCC + + VI has 11 dwords at least. GFX9 has 10 dwords. + + Cc: 17.2 17.3 + Reviewed-by: Nicolai Hähnle + (cherry picked from commit ed4780383cae61e051b3d3d120649222da49feae) + [Emil Velikov: s|radeon/r600_texture.c|radeonsi/si_state.c|] + Signed-off-by: Emil Velikov + + Conflicts: + src/gallium/drivers/radeon/r600_texture.c + +commit c846d72523215261a0bd2dfbbd8875c7809b93bc +Author: Frank Richter +Date: Tue Oct 17 10:34:27 2017 +0200 + + gallium/wgl: fix default pixel format issue + + When creating a context without SetPixelFormat() don't blindly take the + pixel format reported by GDI. Instead, look for our own closest pixel + format. + + Minor clean-ups added by Brian Paul. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103412 + Reviewed-by: Brian Paul + Tested-by: Brian Paul + (cherry picked from commit bf41b2b2627aa3790d380092c28c5d3395cc9cde) + +commit 56993f4b8a40e4fa55e97788091aaf09a8d22482 +Author: Roland Scheidegger +Date: Wed Nov 22 03:11:33 2017 +0100 + + r600: set DX10_CLAMP for compute shader too + + I really intended to set this for all shader stages by + 3835009796166968750ff46cf209f6d4208cda86 but missed it for compute shaders + (because it's in a different source file...). + + Reviewed-by: Dave Airlie + (cherry picked from commit 71e630753ebbee82e8f8709da5488296b2c070c8) + +commit 9b2c27a39e096a49e9a1ea4aeb70b688a239ef23 +Author: Roland Scheidegger +Date: Thu Nov 9 19:41:29 2017 +0100 + + r600: use DX10_CLAMP bit in shader setup + + The docs are not very concise in what this really does, however both + Alex Deucher and Nicolai Hähnle suggested this only really affects instructions + using the CLAMP output modifier, and I've confirmed that with the newly + changed piglit isinf_and_isnan test. + So, with this bit set, if an instruction has the CLAMP modifier bit (which + clamps to [0,1]) set, then NaNs will be converted to zero, otherwise the result + will be NaN. + D3D10 would require this, glsl doesn't have modifiers (with mesa + clamp(x,0,1) would get converted to such a modifier) coupled with a + whatever-floats-your-boat specified NaN behavior, but the clamp behavior + should probably always be used (this also matches what a decomposition into + min(1.0, max(x, 0.0)) would do, if min/max also adhere to the ieee spec of + picking the non-nan result). + Some apps may in fact rely on this, as this prevents misrenderings in + This War of Mine since using ieee muls + (ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use clamped + rcp opcode, which would also fix this bug there. + radeonsi also seems to set this bit nowadays if I see that righ (albeit the + llvm amdgpu code comment now says "Make clamp modifier on NaN input returns 0" + instead of "Do not clamp NAN to 0" since it was changed, which also looks + a bit misleading). + + v2: set it in all shader stages. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544 + + Reviewed-by: Dave Airlie + (cherry picked from commit 3835009796166968750ff46cf209f6d4208cda86) + +commit 6954eb1a2a3fc14ced5355f7b0fcf28634e4bb34 +Author: Roland Scheidegger +Date: Thu Nov 9 19:37:54 2017 +0100 + + r600: use min_dx10/max_dx10 instead of min/max + + I believe this is the safe thing to do, especially ever since the driver + actually generates NaNs for muls too. + The ISA docs are not very helpful here, however the dx10 versions will pick + a non-nan result over a NaN one (this is also the ieee754 behavior), whereas + the non-dx10 ones will pick the NaN (verified by newly changed piglit + isinf-and-isnan test). + Other "modern" drivers will most likely do the same. + This was shown to make some difference for bug 103544, albeit it is not + required to fix it. + + Reviewed-by: Dave Airlie + (cherry picked from commit aab0bfc648bf1be50b81a25224970015f1dc78b8) + +commit b79e15b08699ecf8d2f67eecff46f1c8b7d9b577 +Author: Nicolai Hähnle +Date: Tue Aug 1 12:44:34 2017 +0200 + + glsl: fix interpolateAtXxx(some_vec[idx], ...) with dynamic idx + + The dynamic index of a vector (not array!) is lowered to a sequence of + conditional assignments. However, the interpolate_at_* expressions + require that the interpolant is an l-value of a shader input. + + So instead of doing conditional assignments of parts of the shader input + and then interpolating that (which is nonsensical), we interpolate the + entire shader input and then do conditional assignments of the interpolated + result. + + Reviewed-by: Timothy Arceri + (cherry picked from commit ca63a5ed3e9efb2bd645b425f7393089f4e132a6) + +commit 77cba992c39de3f5dfb76f1b93dc0d77f3bc8a04 +Author: Nicolai Hähnle +Date: Wed Jun 14 12:43:10 2017 +0200 + + glsl: allow any l-value of an input variable as interpolant in interpolateAt* + + The intended rule has been clarified in GLSL 4.60, Section 8.13.2 + (Interpolation Functions): + + "For all of the interpolation functions, interpolant must be an l-value + from an in declaration; this can include a variable, a block or + structure member, an array element, or some combination of these. + Component selection operators (e.g., .xy) may be used when specifying + interpolant." + + For members of interface blocks, var->data.must_be_shader_input must be + determined on-the-fly after lowering interface blocks, since we don't want + to disable varying packing for an entire block just because one input in it + is used in interpolateAt*. + + v2: keep setting must_be_shader_input in ast_function (Ian) + v3: follow the relaxed rule of GLSL 4.60 + v4: only apply the relaxed rules to desktop GL + (the ES WG decided that the relaxed rules may apply in a future version + but not retroactively; see also + dEQP-GLES31.functional.shaders.multisample_interpolation.interpolate_at_centroid.negative.*) + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101378 + Reviewed-by: Ian Romanick (v1) + Reviewed-by: Timothy Arceri + (cherry picked from commit 4f42450b86ea30f9228309e02ca68755c389866f) + +commit 88fd81d3a3c74c877c20924875ac240fe74d7b73 +Author: Kenneth Graunke +Date: Sat Nov 25 16:59:27 2017 -0800 + + i965: Fix Smooth Point Enables. + + We want to program the 3DSTATE_RASTER field to the gl_context value, + not the other way around. + + Fixes: 13ac46557ab1 (i965: Port Gen8+ 3DSTATE_RASTER state to genxml.) + + Reviewed-by: Jason Ekstrand + Reviewed-by: Lionel Landwerlin + (cherry picked from commit 760e0156dfd5cf0281bc964a8090c792fc44ab16) + +commit f7687449701b4e08c3a96a765f6425677a1b4c67 +Author: Nicolai Hähnle +Date: Fri Nov 17 20:01:50 2017 +0100 + + st_glsl_to_tgsi: check for the tail sentinel in merge_two_dsts + + This fixes yet another case where DFRACEXP has only one destination. Found + by address sanitizer. + + Fixes tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/fs-frexp-dvec4-only-mantissa.shader_test + + Fixes: 3b666aa74795 ("st/glsl_to_tgsi: fix DFRACEXP with only one destination") + Acked-by: Marek Olšák + (cherry picked from commit 7e35bdad1c67d7df2832ac4b39bff471e83812e5) + +commit 1e908f5035a84684798f270b500561f0d9290635 +Author: Marek Olšák +Date: Tue Nov 28 20:57:10 2017 +0100 + + radeonsi: fix layered DCC fast clear + + Cc: 17.2 17.3 + Reviewed-by: Nicolai Hähnle + (cherry picked from commit 6863651bbdd7dcfad60bae78d1e17898f49ca08b) + +commit 9777d08e573d229b8b221fc5b9c7797cfb713ba0 +Author: Dave Airlie +Date: Sun Nov 26 23:36:39 2017 +0000 + + r600/sb: handle jump after target to end of program. (v2) + + This fixes hangs on cayman with + tests/spec/arb_tessellation_shader/execution/trivial-tess-gs_no-gs-inputs.shader_test + + This has a single if/else in it, and when this peephole activated, + it would set the jump target to NULL if there was no instruction + after the final POP. This adds a NOP if we get a jump in this case, + and seems to fix the hangs, so we have a valid target for the ELSE + instruction to go to, instead of 0 (which causes infinite loops). + + v2: update last_cf correctly. (I had some other patches hide this) + + Cc: + Signed-off-by: Dave Airlie + (cherry picked from commit 579ec9c311eb5176054b624f39c5c024605b58d6) + +commit aa4b1e71cbe481f198c57de4fa74f43e00a7c6c1 +Author: Ben Crocker +Date: Mon Nov 27 14:44:59 2017 -0500 + + docs/llvmpipe.html: Minor edits + + Language and spelling fixups in three places. + + Cc: "17.2" "17.3" + Signed-off-by: Ben Crocker + Reviewed-by: Eric Engestrom + + [Eric: move two fixes from the other patch to this one.] + Signed-off-by: Eric Engestrom + (cherry picked from commit b43daf7bf6cb505ece025c718ac6f074c38b2d49) + +commit 3dc6072e3d6b127947dc8a72e3cca035e034c19d +Author: Kai Wasserbäch +Date: Thu Nov 16 12:58:50 2017 +0100 + + docs: Point to apt.llvm.org for development snapshot packages + + Signed-off-by: Kai Wasserbäch + Reviewed-by: Eric Engestrom + (cherry picked from commit d25123e23a77e216b45f8e1a83ac32805b07be82) + +commit a34ad6f363acbdb173f8e378b3cad557daa62378 +Author: Tapani Pälli +Date: Mon Nov 20 15:00:19 2017 +0200 + + mesa/gles: adjust internal format in glTexSubImage2D error checks + + When floating point textures are created on OpenGL ES 2.0, driver + is free to choose used internal format. Mesa makes this decision in + adjust_for_oes_float_texture. Error checking for glTexImage2D properly + checks that sized formats are not used. We use same error checking + path for glTexSubImage2D (since there is lot of overlap), however since + those checks include internalFormat checks, we need to pass original + internalFormat passed by the client. Patch adds oes_float_internal_format + that does reverse adjust_for_oes_float_texture to get that format. + + Fixes following test failure: + ES2-CTS.gtf.GL2ExtensionTests.texture_float.texture_float + + (when running test with MESA_GLES_VERSION_OVERRIDE=2.0) + + Signed-off-by: Tapani Pälli + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103227 + Cc: "17.3" + Reviewed-by: Matt Turner + (cherry picked from commit 1e508e10d9ae649bfe5ab7b1842993be50052b21) + +commit 4bbc0f366a8a3682678ba49c8f80b2fadb42715b +Author: Emil Velikov +Date: Thu Nov 23 18:51:14 2017 +0000 + + gl_table.py: add extern C guard for the generated glapitable.h + + The header can be included from C++, hence contents should have + appropriate notation. + + Cc: mesa-stable@lists.freedesktop.org + Cc: Dylan Baker + Signed-off-by: Emil Velikov + Reviewed-by: Eric Engestrom + (cherry picked from commit c7616ac06973a80c3c6e9def49a3fa6606ba6097) + +commit 86b35a990195af1c5457f74ff0d90f4537d0ad33 +Author: Eduardo Lima Mitev +Date: Sun Mar 5 20:28:43 2017 +0100 + + glsl/linker: Check that re-declared, inter-shader built-in blocks match + + >From GLSL 4.5 spec, section "7.1 Built-In Language Variables", page 130 of + the PDF states: + + "If multiple shaders using members of a built-in block belonging to + the same interface are linked together in the same program, they must + all redeclare the built-in block in the same way, as described in + section 4.3.9 “Interface Blocks” for interface-block matching, or a + link-time error will result." + + Fixes: + * GL45-CTS.CommonBugs.CommonBug_PerVertexValidation + + v2 (Neil Roberts): + Explicitly look for gl_PerVertex in the symbol tables instead of + waiting to find a variable in the interface. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102677 + Reviewed-by: Kenneth Graunke + Signed-off-by: Eduardo Lima Mitev + Signed-off-by: Neil Roberts + (cherry picked from commit f9de7f55969e981f6e98a41fce04bc3a2a8280eb) + +commit f34c7ba4e1666376ab99e62e436b95eeaf835b16 +Author: Eduardo Lima Mitev +Date: Sun Mar 5 20:28:42 2017 +0100 + + glsl: Use the utility function to copy symbols between symbol tables + + This effectively factorizes a couple of similar routines. + + v2 (Neil Roberts): Non-trivial rebase on master + + Reviewed-by: Kenneth Graunke + Signed-off-by: Eduardo Lima Mitev + Signed-off-by: Neil Roberts + (cherry picked from commit f5fe99ac85e15b705612bd9e7599cc974c2a121b) + +commit ebb7ccb306e0aa8b44fa6ad047579db58e5e2cff +Author: Eduardo Lima Mitev +Date: Sun Mar 5 20:28:41 2017 +0100 + + glsl_parser_extra: Add utility to copy symbols between symbol tables + + Some symbols gathered in the symbols table during parsing are needed + later for the compile and link stages, so they are moved along the + process. Currently, only functions and non-temporary variables are + copied between symbol tables. However, the built-in gl_PerVertex + interface blocks are also needed during the linking stage (the last + step), to match re-declared blocks of inter-stage shaders. + + This patch adds a new utility function that will factorize current code + that copies functions and variables between two symbol tables, and in + addition will copy explicitly declared gl_PerVertex blocks too. + + The function will be used in a subsequent patch. + + v2 (Neil Roberts): + Allow the src symbol table to be NULL and explicitly copy the + gl_PerVertex symbols in case they are not referenced in the exec_list. + + Reviewed-by: Kenneth Graunke + Signed-off-by: Eduardo Lima Mitev + Signed-off-by: Neil Roberts + (cherry picked from commit 4c62a270a99d443316e29020377465a90a6968c0) + +commit e4d964670a6d591352445e870d24454bf67d2970 +Author: Matt Turner +Date: Thu Nov 23 10:41:34 2017 -0800 + + util: Fix disk_cache index calculation on big endian + + The cache-test test program attempts to create a collision (using key_a + and key_a_collide) by making the first two bytes identical. The idea is + fine -- the shader cache wants to use the first four characters of a + SHA1 hex digest as the index. + + The following program + + unsigned char array[4] = {1, 2, 3, 4}; + int *ptr = (int *)array; + + for (int i = 0; i < 4; i++) { + printf("%02x", array[i]); + } + printf("\n"); + + printf("%08x\n", *ptr); + + prints + + 01020304 + 04030201 + + on little endian, and + + 01020304 + 01020304 + + on big endian. + + On big endian platforms reading the character array back as an int (as + is done in disk_cache.c) does not yield the same results as reading the + byte array. + + To get the first four characters of the SHA1 hex digest when we mask + with CACHE_INDEX_KEY_MASK, we need to byte swap the int on big endian + platforms. + + Bugzilla: https://bugs.freedesktop.org/103668 + Bugzilla: https://bugs.gentoo.org/637060 + Bugzilla: https://bugs.gentoo.org/636326 + Fixes: 87ab26b2ab35 ("glsl: Add initial functions to implement an + on-disk cache") + Reviewed-by: Emil Velikov + (cherry picked from commit c690a7a8cdfb6425547bbb782020098405851194) + +commit bb8431aa3e9a53bc919f1b7f75dafa888a094f4b +Author: Matt Turner +Date: Wed Nov 22 22:39:51 2017 -0800 + + util: Fix SHA1 implementation on big endian + + The code defines a macro blk0(i) based on the preprocessor condition + BYTE_ORDER == LITTLE_ENDIAN. If true, blk0(i) is defined as a byte swap + operation. Unfortunately, if the preprocessor macros used in the test + are no defined, then the comparison becomes 0 == 0 and it evaluates as + true. + + Fixes: d1efa09d342b ("util: import sha1 implementation from OpenBSD") + Reviewed-by: Emil Velikov + (cherry picked from commit 532674303a92c438cb1c48d224e9dee9dece91ec) + +commit a05879c982c322be93e3f8a3d407b5d001d16331 +Author: Matt Turner +Date: Mon Nov 20 14:24:57 2017 -0800 + + i965/fs: Handle negating immediates on MADs when propagating saturates + + MADs don't take immediate sources, but we allow them in the IR since it + simplifies a lot of things. I neglected to consider that case. + + Fixes: 4009a9ead490 ("i965/fs: Allow saturate propagation to propagate + negations into MADs.") + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103616 + Reported-and-Tested-by: Ruslan Kabatsayev + Reviewed-by: Ian Romanick + (cherry picked from commit a05af1f7b8f82a38513bba31f9573cd62d82f18d) + +commit 3e639156b84d8de36d8db4dfd90f28b7125c82bb +Author: Nicolai Hähnle +Date: Fri Nov 10 13:11:53 2017 +0100 + + ddebug: fix use-after-free of streamout targets + + Fixes: b47727a83ad6 ("ddebug: implement pipelined hang detection mode") + Reviewed-by: Marek Olšák + (cherry picked from commit 16f8da299700e714fd5aff265b8f28fe2badfa95) + +commit e7904e1275e73ef71ece35c37c408817f40c57c6 +Author: Nicolai Hähnle +Date: Wed Nov 15 11:22:26 2017 +0100 + + radeonsi/gfx9: fix VM fault with fetched instance divisors + + We need to account for SGPR locations in merged shaders. + + This case is exercised by KHR-GL45.enhanced_layouts.vertex_attrib_locations + + Fixes: 79c2e7388c7f ("radeonsi/gfx9: use SPI_SHADER_USER_DATA_COMMON") + Reviewed-by: Marek Olšák + (cherry picked from commit df5ebe0c261e8d13683f2515be9ce263f5437bcd) + +commit 210bbf948ecb101e1ecf0344680d89b34712286d +Author: George Barrett +Date: Sun Nov 19 21:55:10 2017 +1100 + + glsl: Catch subscripted calls to undeclared subroutines + + generate_array_index fails to check whether the target of a subroutine + call exists in the AST, potentially passing around null ir_rvalue + pointers eventuating in abort/segfault. + + Fixes: fd01840c0bd3 ("glsl: add AoA support to subroutines") + Reviewed-by: Timothy Arceri + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100438 + (cherry picked from commit f09c2cefdd53cd61562a994294e9d0630868d2da) + +commit 9ffe450dab68a601c4b9a9b290006b0de37f92ce +Author: Gert Wollny +Date: Fri Nov 17 12:13:40 2017 +0100 + + r600: Emit EOP for more CF instruction types + + So far on pre-cayman chipsets the CF instructions CF_OP_LOOP_END, + CF_OP_CALL_FS, CF_OP_POP, and CF_OP_GDS an extra CF_NOP instruction + was added to add the EOP flag, even though this is not actually + needed, because all these instrutions support the EOP flag. + + This patch removes the fixup code, adds setting the EOP flag for the + according instructions as well as others like CF_OP_TEX and CF_OP_VTX, + and adds writing out EOP for this type of instruction in the disassembler. + + This also fixes a bug where shaders were created that didn't actually have + the EOP flag set in the last CF instruction, which might have resulted + in GPU lockups. + + [airlied: cleaned up a little] + Signed-off-by: Gert Wollny + Cc: + Signed-off-by: Dave Airlie + (cherry picked from commit 1d076aafbc05b0af299826ac0ee63b2fb28e944a) + +commit 2859a8f29826cbc3d7a7b52405c3480d6ee3a307 +Author: Jason Ekstrand +Date: Fri Nov 17 16:52:09 2017 -0800 + + i965: Mark BOs as external when we export their handle + + Almost all of our BO export paths were already properly marked the BO as + external and added it to the handle table. Most export use-cases go + through a prime fd or flink where we have a brw_bo export helper that + does the right thing. The one missing one happens when you call + queryImage and ask for __DRI_IMAGE_ATTRIB_HANDLE. We just grabbed the + gem handle out of the BO (because it's really easy to do that) and + handed it off to the client; what could go wrong? As it turns out, this + path is used by basically every compositor that wants to turn around and + call drmModeAddFB2 on it so it can hand it off to display. The result, + as of 4b1e70cc57d7ff5f465544644b2180dee1490cee, is that we no longer set + MOCS_PTE on those surfaces and the kernel's attempts to disable caching + fail and we scanout gets corruption. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103759 + Fixes: 4b1e70cc57d7ff5f465544644b2180dee1490cee + Reviewed-by: Kenneth Graunke + Cc: mesa-stable@lists.freedesktop.org + (cherry picked from commit 0a6a137eb27129e17298cfe9dd620205588ee4f6) + +commit 0904becf94c1758f8ddb76538c0136beac971546 +Author: Jason Ekstrand +Date: Fri Nov 17 16:49:03 2017 -0800 + + i965/bufmgr: Add a helper to mark a BO as external + + Reviewed-by: Kenneth Graunke + Cc: mesa-stable@lists.freedesktop.org + (cherry picked from commit 344252a27f8d875572bbe65641a825af8e73845d) + commit d1e6cf4639cd771c5896fb82d549cf5c5681a9f8 Author: Emil Velikov Date: Mon Nov 20 13:59:12 2017 +0000 @@ -58363,3 +59093,89 @@ Signed-off-by: Emil Velikov (cherry picked from commit 46cc7a1746e03b1672c8508af49eb60546d5b61d) + +commit 1da51ec0f7120be5411568b8e8305d47e19673b8 +Author: Samuel Pitoiset +Date: Thu May 25 19:12:12 2017 +0200 + + glsl: fix a crash in ir_print_visitor() for bindless samplers/images + + Bindless samplers/images are represented with 64-bit unsigned + integers and they can be assigned with explicit constructors. + + Signed-off-by: Samuel Pitoiset + Reviewed-by: Timothy Arceri + Reviewed-by: Ian Romanick + +commit e4e5562d8ad99ef39f430ce0546f4f8775d4824f +Author: Samuel Pitoiset +Date: Thu May 25 18:55:09 2017 +0200 + + glsl: teach opt_array_splitting about bindless images + + Memory/format layout qualifiers shouldn't be lost when arrays + of images are splitted by this pass. + + Signed-off-by: Samuel Pitoiset + Reviewed-by: Timothy Arceri + +commit 678e05cc345b714919959cb2c93fb9f052315355 +Author: Samuel Pitoiset +Date: Thu May 25 18:36:35 2017 +0200 + + glsl: teach opt_structure_splitting about images in structures + + GL_ARB_bindless_texture allows images to be declared inside + structures, but when memory/format qualifiers are used, they + should be propagated when structures are splitted. + + Signed-off-by: Samuel Pitoiset + Reviewed-by: Timothy Arceri + +commit 71efec290c8b28fa25c50a923b30a60cc38b934c +Author: Samuel Pitoiset +Date: Thu May 25 18:29:50 2017 +0200 + + glsl: fix broken indentation in do_structure_splitting() + + Signed-off-by: Samuel Pitoiset + Reviewed-by: Timothy Arceri + +commit ad717102d9db2bc0a0c9d1b1fa6d991f659da04d +Author: Samuel Pitoiset +Date: Thu May 25 16:26:42 2017 +0200 + + glsl: handle format layout qualifiers for struct with array of images + + This handles a situation like: + + struct { + layout (r32f) image2D imgs[6]; + } s; + + Signed-off-by: Samuel Pitoiset + Reviewed-by: Timothy Arceri + +commit d9460ad600781b1e4420b9b754a92c0b049e4453 +Author: Samuel Pitoiset +Date: Thu May 25 16:19:58 2017 +0200 + + glsl: handle memory qualifiers for struct with array of images + + This handles a situation like: + + struct { + image2D imgs[6]; + } s; + + Signed-off-by: Samuel Pitoiset + Reviewed-by: Timothy Arceri + +commit e3054004433fa91d4ab195604f5fa28487d34096 +Author: Rhys Kidd +Date: Wed May 31 18:48:09 2017 -0400 + + nvc0: Clean up unnecessary includes from gallium/auxiliary/vl/ + + Signed-off-by: Rhys Kidd + Reviewed-by: Samuel Pitoiset diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/configure.ac mesa-17.3.0~git20171212+17.3.49a612d1/configure.ac --- mesa-17.3.0~git20171121+17.3.d1e6cf46/configure.ac 2017-11-21 10:19:31.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/configure.ac 2017-12-12 07:37:17.000000000 +0000 @@ -793,6 +793,7 @@ AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"]) AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"]) AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"]) +AC_CHECK_FUNC([memfd_create], [DEFINES="$DEFINES -DHAVE_MEMFD_CREATE"]) AC_MSG_CHECKING([whether strtod has locale support]) AC_LINK_IFELSE([AC_LANG_SOURCE([[ diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/debian/changelog mesa-17.3.0~git20171212+17.3.49a612d1/debian/changelog --- mesa-17.3.0~git20171121+17.3.d1e6cf46/debian/changelog 2017-12-12 07:47:33.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/debian/changelog 2017-12-12 07:47:34.000000000 +0000 @@ -1,7 +1,7 @@ -mesa (17.3.0~git20171121+17.3.d1e6cf46-0ubuntu0ricotz~16.04.1) xenial; urgency=medium +mesa (17.3.0~git20171212+17.3.49a612d1-0ubuntu0ricotz~16.04.1) xenial; urgency=medium - * Checkout from git 20171121 (17.3 branch) up to commit - d1e6cf4639cd771c5896fb82d549cf5c5681a9f8 + * Checkout from git 20171212 (17.3 branch) up to commit + 49a612d1580b3316392273a069d20d93967126a8 * Only added debian/ tree from origin/ubuntu * hook: Disable MIR support. * hook: Relax symbols check. @@ -10,7 +10,7 @@ * hook: Drop egl-platform-rs.patch (no-mir) * hook: Drop khr_platform_mir.patch (no-mir) - -- Rico Tzschichholz Tue, 21 Nov 2017 11:20:13 +0100 + -- Rico Tzschichholz Tue, 12 Dec 2017 08:38:05 +0100 mesa (17.2.4-0ubuntu2) bionic; urgency=medium diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/docs/llvmpipe.html mesa-17.3.0~git20171212+17.3.49a612d1/docs/llvmpipe.html --- mesa-17.3.0~git20171121+17.3.d1e6cf46/docs/llvmpipe.html 2017-05-24 05:45:46.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/docs/llvmpipe.html 2017-12-12 07:37:17.000000000 +0000 @@ -32,11 +32,11 @@
  • -

    An x86 or amd64 processor; 64-bit mode recommended.

    +

    For x86 or amd64 processors, 64-bit mode is recommended.

    Support for SSE2 is strongly encouraged. Support for SSE3 and SSE4.1 will yield the most efficient code. The fewer features the CPU has the more - likely is that you run into underperforming, buggy, or incomplete code. + likely it is that you will run into underperforming, buggy, or incomplete code.

    See /proc/cpuinfo to know what your CPU supports. @@ -51,6 +51,12 @@ aptitude install llvm-dev

    + If you want development snapshot builds of LLVM for Debian and derived + distributions like Ubuntu, you can use the APT repository at apt.llvm.org, which are maintained by Debian's LLVM maintainer. +

    +

    For a RPM-based distribution do:

    @@ -228,8 +234,8 @@
     

-Some of this tests can output results and benchmarks to a tab-separated-file -for posterior analysis, e.g.: +Some of these tests can output results and benchmarks to a tab-separated file +for later analysis, e.g.:

   build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
@@ -240,8 +246,8 @@
 
 
  • - When looking to this code by the first time start in lp_state_fs.c, and - then skim through the lp_bld_* functions called in there, and the comments + When looking at this code for the first time, start in lp_state_fs.c, and + then skim through the lp_bld_* functions called there, and the comments at the top of the lp_bld_*.c functions.
  • diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/docs/relnotes/17.3.0.html mesa-17.3.0~git20171212+17.3.49a612d1/docs/relnotes/17.3.0.html --- mesa-17.3.0~git20171121+17.3.d1e6cf46/docs/relnotes/17.3.0.html 2017-11-21 10:19:31.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/docs/relnotes/17.3.0.html 2017-12-12 07:37:17.000000000 +0000 @@ -14,7 +14,7 @@
    -

    Mesa 17.3.0 Release Notes / TBD

    +

    Mesa 17.3.0 Release Notes / December 8. 2017

    Mesa 17.3.0 is a new development release. @@ -33,7 +33,8 @@

    SHA256 checksums

    -TBD.
    +0cb1ffe2b4637d80f08df3bdfeb300352dcffd8ff4f6711278639b084e3f07f9  mesa-17.3.0.tar.gz
    +29a0a3a6c39990d491a1a58ed5c692e596b3bfc6c01d0b45e0b787116c50c6d9  mesa-17.3.0.tar.xz
     
    @@ -58,14 +59,187 @@

    Bug fixes

      -TBD + +
    • Bug 97532 - Regression: GLB 2.7 & Glmark-2 GLES versions segfault due to linker precision error (259fc505) on dead variable
    • + +
    • Bug 100438 - glsl/ir.cpp:1376: ir_dereference_variable::ir_dereference_variable(ir_variable*): Assertion `var != NULL' failed.
    • + +
    • Bug 100613 - Regression in Mesa 17 on s390x (zSystems)
    • + +
    • Bug 101334 - AMD SI cards: Some vulkan apps freeze the system
    • + +
    • Bug 101378 - interpolateAtSample check for input parameter is too strict
    • + +
    • Bug 101655 - Explicit sync support for android
    • + +
    • Bug 101691 - gfx corruption on windowed 3d-apps running on dGPU
    • + +
    • Bug 101709 - [llvmpipe] piglit gl-1.0-scissor-offscreen regression
    • + +
    • Bug 101766 - Assertion `!"invalid type"' failed when constant expression involves literal of different type
    • + +
    • Bug 101832 - [PATCH][regression][bisect] Xorg fails to start after f50aa21456d82c8cb6fbaa565835f1acc1720a5d
    • + +
    • Bug 101851 - [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'
    • + +
    • Bug 101867 - Launch options window renders black in Feral Games in current Mesa trunk
    • + +
    • Bug 101876 - SIGSEGV when launching Steam
    • + +
    • Bug 101910 - [BYT] ES31-CTS.functional.copy_image.non_compressed.viewclass_96_bits.rgb32f_rgb32f
    • + +
    • Bug 101925 - playstore/webview crash
    • + +
    • Bug 101941 - Getting different output depending on attribute declaration order
    • + +
    • Bug 101961 - Serious Sam Fusion hangs system completely
    • + +
    • Bug 101981 - Commit ddc32537d6db69198e88ef0dfe19770bf9daa536 breaks rendering in multiple applications
    • + +
    • Bug 101982 - Weston crashes when running an OpenGL program on i965
    • + +
    • Bug 101983 - [G33] ES2-CTS.functional.shaders.struct.uniform.sampler_nested* regression
    • + +
    • Bug 101989 - ES3-CTS.functional.state_query.integers.viewport_getinteger regression
    • + +
    • Bug 102006 - gstreamer vaapih264enc segfault
    • + +
    • Bug 102014 - Mesa git build broken by commit bc7f41e11d325280db12e7b9444501357bc13922
    • + +
    • Bug 102015 - [Regression,bisected]: Segfaults with various programs
    • + +
    • Bug 102024 - FORMAT_FEATURE_SAMPLED_IMAGE_BIT not supported for D16_UNORM and D32_SFLOAT
    • + +
    • Bug 102038 - assertion failure in update_framebuffer_size
    • + +
    • Bug 102050 - commit b4f639d02a causes build breakage on Android 32bit builds
    • + +
    • Bug 102052 - No package 'expat' found
    • + +
    • Bug 102062 - Segfault at eglCreateContext in android-x86
    • + +
    • Bug 102125 - [softpipe] piglit arb_texture_view-targets regression
    • + +
    • Bug 102148 - Crash when running qopenglwidget example on mesa llvmpipe win32
    • + +
    • Bug 102177 - [SKL] ES31-CTS.core.sepshaderobjs.StateInteraction fails sporadically
    • + +
    • Bug 102201 - [regression, SI] GPU crash in Unigine Valley
    • + +
    • Bug 102241 - gallium/wgl: SwapBuffers freezing regularly with swap interval enabled
    • + +
    • Bug 102274 - assertion failure in ir_validate.cpp:240
    • + +
    • Bug 102308 - segfault in glCompressedTextureSubImage3D
    • + +
    • Bug 102358 - WarThunder freezes at start, with activated vsync (vblank_mode=2)
    • + +
    • Bug 102377 - PIPE_*_4BYTE_ALIGNED_ONLY caps crashing
    • + +
    • Bug 102429 - [regression, SI] Performance decrease in Unigine Valley & Heaven
    • + +
    • Bug 102435 - [skl,kbl] [drm] GPU HANG: ecode 9:0:0x86df7cf9, in csgo_linux64 [4947], reason: Hang on rcs, action: reset
    • + +
    • Bug 102454 - glibc 2.26 doesn't provide anymore xlocale.h
    • + +
    • Bug 102461 - [llvmpipe] piglit glean fragprog1 XPD test 1 regression
    • + +
    • Bug 102467 - src/mesa/state_tracker/st_cb_readpixels.c:178]: (warning) Redundant assignment
    • + +
    • Bug 102496 - Frontbuffer rendering corruption on mesa master
    • + +
    • Bug 102502 - [bisected] Kodi crashes since commit 707d2e8b - gallium: fold u_trim_pipe_prim call from st/mesa to drivers
    • + +
    • Bug 102530 - [bisected] Kodi crashes when launching a stream - commit bd2662bf
    • + +
    • Bug 102552 - Null dereference due to not checking return value of util_format_description
    • + +
    • Bug 102565 - u_debug_stack.c:114: undefined reference to `_Ux86_64_getcontext'
    • + +
    • Bug 102573 - fails to build on armel
    • + +
    • Bug 102665 - test_glsl_to_tgsi_lifetime.cpp:53:67: error: ‘>>’ should be ‘> >’ within a nested template argument list
    • + +
    • Bug 102677 - [OpenGL CTS] KHR-GL45.CommonBugs.CommonBug_PerVertexValidation fails
    • + +
    • Bug 102680 - [OpenGL CTS] KHR-GL45.shader_ballot_tests.ShaderBallotBitmasks fails
    • + +
    • Bug 102685 - piglit.spec.glsl-1_50.compiler.vs-redeclares-pervertex-out-before-global-redeclaration
    • + +
    • Bug 102774 - [BDW] [Bisected] Absolute constant buffers break VAAPI in mpv
    • + +
    • Bug 102809 - Rust shadows(?) flash random colours
    • + +
    • Bug 102844 - memory leak with glDeleteProgram for shader program type GL_COMPUTE_SHADER
    • + +
    • Bug 102847 - swr fail to build with llvm-5.0.0
    • + +
    • Bug 102852 - Scons: Support the new Scons 3.0.0
    • + +
    • Bug 102904 - piglit and gl45 cts linker tests regressed
    • + +
    • Bug 102924 - mesa (git version) images too dark
    • + +
    • Bug 102940 - Regression: Vulkan KMS rendering crashes since 17.2
    • + +
    • Bug 102955 - HyperZ related rendering issue in ARK: Survival Evolved
    • + +
    • Bug 102999 - [BISECTED,REGRESSION] Failing Android EGL dEQP with RGBA configs
    • + +
    • Bug 103002 - string_buffer_test.cpp:43: error: ISO C++ forbids initialization of member ‘str1’
    • + +
    • Bug 103085 - [ivb byt hsw] piglit.spec.arb_indirect_parameters.tf-count-arrays
    • + +
    • Bug 103098 - [OpenGL CTS] KHR-GL45.enhanced_layouts.varying_structure_locations fails
    • + +
    • Bug 103101 - [SKL][bisected] DiRT Rally GPU hang
    • + +
    • Bug 103115 - [BSW BXT GLK] dEQP-VK.spirv_assembly.instruction.compute.sconvert.int32_to_int64
    • + +
    • Bug 103128 - [softpipe] piglit fs-ldexp regression
    • + +
    • Bug 103142 - R600g+sb: optimizer apparently stuck in an endless loop
    • + +
    • Bug 103214 - GLES CTS functional.state_query.indexed.atomic_counter regression
    • + +
    • Bug 103227 - [G965 G45 ILK] ES2-CTS.gtf.GL2ExtensionTests.texture_float.texture_float regression
    • + +
    • Bug 103247 - Performance regression: car chase, manhattan
    • + +
    • Bug 103253 - blob.h:138:1: error: unknown type name 'ssize_t'
    • + +
    • Bug 103265 - [llvmpipe] piglit depth-tex-compare regression
    • + +
    • Bug 103323 - Possible unintended error message in file pixel.c line 286
    • + +
    • Bug 103388 - Linking libcltgsi.la (llvm/codegen/libclllvm_la-common.lo) fails with "error: no match for 'operator-'" with GCC-7, Mesa from Git and current LLVM revisions
    • + +
    • Bug 103393 - glDispatchComputeGroupSizeARB : gl_GlobalInvocationID.x != gl_WorkGroupID.x * gl_LocalGroupSizeARB.x + gl_LocalInvocationID.x
    • + +
    • Bug 103412 - gallium/wgl: Another fix to context creation without prior SetPixelFormat()
    • + +
    • Bug 103519 - wayland egl apps crash on start with mesa 17.2
    • + +
    • Bug 103529 - [GM45] GPU hang with mpv fullscreen (bisected)
    • + +
    • Bug 103537 - i965: Shadow of Mordor broken since commit 379b24a40d3d34ffdaaeb1b328f50e28ecb01468 on Haswell
    • + +
    • Bug 103544 - Graphical glitches r600 in game this war of mine linux native
    • + +
    • Bug 103616 - Increased difference from reference image in shaders
    • + +
    • Bug 103628 - [BXT, GLK, BSW] KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks
    • + +
    • Bug 103759 - plasma desktop corrupted rendering
    • + +
    • Bug 103787 - [BDW,BSW] gpu hang on spec.arb_pipeline_statistics_query.arb_pipeline_statistics_query-comp
    • + +
    • Bug 103909 - anv_allocator.c:113:1: error: static declaration of ‘memfd_create’ follows non-static declaration
    • +

    Changes

    -
      -TBD -
    diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/.lastcommit mesa-17.3.0~git20171212+17.3.49a612d1/.lastcommit --- mesa-17.3.0~git20171121+17.3.d1e6cf46/.lastcommit 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/.lastcommit 2017-12-12 07:37:18.000000000 +0000 @@ -1 +1 @@ -commit d1e6cf4639cd771c5896fb82d549cf5c5681a9f8 +commit 49a612d1580b3316392273a069d20d93967126a8 diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/ast_function.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/ast_function.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/ast_function.cpp 2017-11-21 10:19:31.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/ast_function.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -227,19 +227,28 @@ val = ((ir_swizzle *)val)->val; } - while (val->ir_type == ir_type_dereference_array) { - val = ((ir_dereference_array *)val)->array; + for (;;) { + if (val->ir_type == ir_type_dereference_array) { + val = ((ir_dereference_array *)val)->array; + } else if (val->ir_type == ir_type_dereference_record && + !state->es_shader) { + val = ((ir_dereference_record *)val)->record; + } else + break; } - if (!val->as_dereference_variable() || - val->variable_referenced()->data.mode != ir_var_shader_in) { + ir_variable *var = NULL; + if (const ir_dereference_variable *deref_var = val->as_dereference_variable()) + var = deref_var->variable_referenced(); + + if (!var || var->data.mode != ir_var_shader_in) { _mesa_glsl_error(&loc, state, "parameter `%s` must be a shader input", formal->name); return false; } - val->variable_referenced()->data.must_be_shader_input = 1; + var->data.must_be_shader_input = 1; } /* Verify that 'out' and 'inout' actual parameters are lvalues. */ @@ -667,8 +676,13 @@ ir_variable *sub_var = NULL; *function_name = array->primary_expression.identifier; - match_subroutine_by_name(*function_name, actual_parameters, - state, &sub_var); + if (!match_subroutine_by_name(*function_name, actual_parameters, + state, &sub_var)) { + _mesa_glsl_error(&loc, state, "Unknown subroutine `%s'", + *function_name); + *function_name = NULL; /* indicate error condition to caller */ + return NULL; + } ir_rvalue *outer_array_idx = idx->hir(instructions, state); return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx); diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/glsl_parser_extras.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/glsl_parser_extras.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/glsl_parser_extras.cpp 2017-11-21 10:19:31.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/glsl_parser_extras.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -1863,6 +1863,49 @@ shader->bound_image = state->bound_image_specified; } +/* src can be NULL if only the symbols found in the exec_list should be + * copied + */ +void +_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir, + struct glsl_symbol_table *src, + struct glsl_symbol_table *dest) +{ + foreach_in_list (ir_instruction, ir, shader_ir) { + switch (ir->ir_type) { + case ir_type_function: + dest->add_function((ir_function *) ir); + break; + case ir_type_variable: { + ir_variable *const var = (ir_variable *) ir; + + if (var->data.mode != ir_var_temporary) + dest->add_variable(var); + break; + } + default: + break; + } + } + + if (src != NULL) { + /* Explicitly copy the gl_PerVertex interface definitions because these + * are needed to check they are the same during the interstage link. + * They can’t necessarily be found via the exec_list because the members + * might not be referenced. The GL spec still requires that they match + * in that case. + */ + const glsl_type *iface = + src->get_interface("gl_PerVertex", ir_var_shader_in); + if (iface) + dest->add_interface(iface->name, iface, ir_var_shader_in); + + iface = src->get_interface("gl_PerVertex", ir_var_shader_out); + if (iface) + dest->add_interface(iface->name, iface, ir_var_shader_out); + } +} + extern "C" { static void @@ -1936,6 +1979,7 @@ static void opt_shader_and_create_symbol_table(struct gl_context *ctx, + struct glsl_symbol_table *source_symbols, struct gl_shader *shader) { assert(shader->CompileStatus != compile_failure && @@ -1993,22 +2037,8 @@ * We don't have to worry about types or interface-types here because those * are fly-weights that are looked up by glsl_type. */ - foreach_in_list (ir_instruction, ir, shader->ir) { - switch (ir->ir_type) { - case ir_type_function: - shader->symbols->add_function((ir_function *) ir); - break; - case ir_type_variable: { - ir_variable *const var = (ir_variable *) ir; - - if (var->data.mode != ir_var_temporary) - shader->symbols->add_variable(var); - break; - } - default: - break; - } - } + _mesa_glsl_copy_symbols_from_table(shader->ir, source_symbols, + shader->symbols); } void @@ -2045,7 +2075,9 @@ return; if (shader->CompileStatus == compiled_no_opts) { - opt_shader_and_create_symbol_table(ctx, shader); + opt_shader_and_create_symbol_table(ctx, + NULL, /* source_symbols */ + shader); shader->CompileStatus = compile_success; return; } @@ -2106,7 +2138,7 @@ lower_subroutine(shader->ir, state); if (!ctx->Cache || force_recompile) - opt_shader_and_create_symbol_table(ctx, shader); + opt_shader_and_create_symbol_table(ctx, state->symbols, shader); else { reparent_ir(shader->ir, shader->ir); shader->CompileStatus = compiled_no_opts; diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/glsl_parser_extras.h mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/glsl_parser_extras.h --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/glsl_parser_extras.h 2017-11-21 10:19:31.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/glsl_parser_extras.h 2017-12-12 07:37:17.000000000 +0000 @@ -948,6 +948,11 @@ extern void _mesa_destroy_shader_compiler(void); extern void _mesa_destroy_shader_compiler_caches(void); +extern void +_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir, + struct glsl_symbol_table *src, + struct glsl_symbol_table *dest); + #ifdef __cplusplus } #endif diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/linker.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/linker.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/linker.cpp 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/linker.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -1261,21 +1261,11 @@ * Populates a shaders symbol table with all global declarations */ static void -populate_symbol_table(gl_linked_shader *sh) +populate_symbol_table(gl_linked_shader *sh, glsl_symbol_table *symbols) { sh->symbols = new(sh) glsl_symbol_table; - foreach_in_list(ir_instruction, inst, sh->ir) { - ir_variable *var; - ir_function *func; - - if ((func = inst->as_function()) != NULL) { - sh->symbols->add_function(func); - } else if ((var = inst->as_variable()) != NULL) { - if (var->data.mode != ir_var_temporary) - sh->symbols->add_variable(var); - } - } + _mesa_glsl_copy_symbols_from_table(sh->ir, symbols, sh->symbols); } @@ -2293,7 +2283,7 @@ link_bindless_layout_qualifiers(prog, shader_list, num_shaders); - populate_symbol_table(linked); + populate_symbol_table(linked, shader_list[0]->symbols); /* The pointer to the main function in the final linked shader (i.e., the * copy of the original shader that contained the main function). diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/link_interface_blocks.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/link_interface_blocks.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/link_interface_blocks.cpp 2017-02-12 09:42:57.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/link_interface_blocks.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -364,6 +364,35 @@ consumer->Stage != MESA_SHADER_FRAGMENT) || consumer->Stage == MESA_SHADER_GEOMETRY; + /* Check that block re-declarations of gl_PerVertex are compatible + * across shaders: From OpenGL Shading Language 4.5, section + * "7.1 Built-In Language Variables", page 130 of the PDF: + * + * "If multiple shaders using members of a built-in block belonging + * to the same interface are linked together in the same program, + * they must all redeclare the built-in block in the same way, as + * described in section 4.3.9 “Interface Blocks” for interface-block + * matching, or a link-time error will result." + * + * This is done explicitly outside of iterating the member variable + * declarations because it is possible that the variables are not used and + * so they would have been optimised out. + */ + const glsl_type *consumer_iface = + consumer->symbols->get_interface("gl_PerVertex", + ir_var_shader_in); + + const glsl_type *producer_iface = + producer->symbols->get_interface("gl_PerVertex", + ir_var_shader_out); + + if (producer_iface && consumer_iface && + interstage_member_mismatch(prog, consumer_iface, producer_iface)) { + linker_error(prog, "Incompatible or missing gl_PerVertex re-declaration " + "in consecutive shaders"); + return; + } + /* Add output interfaces from the producer to the symbol table. */ foreach_in_list(ir_instruction, node, producer->ir) { ir_variable *var = node->as_variable(); diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/lower_named_interface_blocks.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/lower_named_interface_blocks.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/lower_named_interface_blocks.cpp 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/lower_named_interface_blocks.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -115,6 +115,7 @@ void run(exec_list *instructions); virtual ir_visitor_status visit_leave(ir_assignment *); + virtual ir_visitor_status visit_leave(ir_expression *); virtual void handle_rvalue(ir_rvalue **rvalue); }; @@ -238,6 +239,23 @@ return rvalue_visit(ir); } +ir_visitor_status +flatten_named_interface_blocks_declarations::visit_leave(ir_expression *ir) +{ + ir_visitor_status status = rvalue_visit(ir); + + if (ir->operation == ir_unop_interpolate_at_centroid || + ir->operation == ir_binop_interpolate_at_offset || + ir->operation == ir_binop_interpolate_at_sample) { + const ir_rvalue *val = ir->operands[0]; + + /* This disables varying packing for this input. */ + val->variable_referenced()->data.must_be_shader_input = 1; + } + + return status; +} + void flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue) { diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -128,7 +128,36 @@ { ir_expression *const expr = ir->as_expression(); - if (expr == NULL || expr->operation != ir_binop_vector_extract) + if (expr == NULL) + return ir; + + if (expr->operation == ir_unop_interpolate_at_centroid || + expr->operation == ir_binop_interpolate_at_offset || + expr->operation == ir_binop_interpolate_at_sample) { + /* Lower interpolateAtXxx(some_vec[idx], ...) to + * interpolateAtXxx(some_vec, ...)[idx] before lowering to conditional + * assignments, to maintain the rule that the interpolant is an l-value + * referring to a (part of a) shader input. + * + * This is required when idx is dynamic (otherwise it gets lowered to + * a swizzle). + */ + ir_expression *const interpolant = expr->operands[0]->as_expression(); + if (!interpolant || interpolant->operation != ir_binop_vector_extract) + return ir; + + ir_rvalue *vec_input = interpolant->operands[0]; + ir_expression *const vec_interpolate = + new(base_ir) ir_expression(expr->operation, vec_input->type, + vec_input, expr->operands[1]); + + return convert_vec_index_to_cond_assign(ralloc_parent(ir), + vec_interpolate, + interpolant->operands[1], + ir->type); + } + + if (expr->operation != ir_binop_vector_extract) return ir; return convert_vec_index_to_cond_assign(ralloc_parent(ir), diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/ddebug/dd_draw.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/ddebug/dd_draw.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/ddebug/dd_draw.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/ddebug/dd_draw.c 2017-12-12 07:37:17.000000000 +0000 @@ -847,7 +847,7 @@ } dst->num_so_targets = src->num_so_targets; - for (i = 0; i < ARRAY_SIZE(src->so_targets); i++) + for (i = 0; i < src->num_so_targets; i++) pipe_so_target_reference(&dst->so_targets[i], src->so_targets[i]); memcpy(dst->so_offsets, src->so_offsets, sizeof(src->so_offsets)); diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/eg_asm.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/eg_asm.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/eg_asm.c 2017-08-14 14:37:28.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/eg_asm.c 2017-12-12 07:37:17.000000000 +0000 @@ -71,9 +71,12 @@ } else if (cfop->flags & CF_CLAUSE) { /* CF_TEX/VTX (CF_ALU already handled above) */ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) | + bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); + id++; } else if (cfop->flags & CF_EXP) { /* EXPORT instructions */ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | @@ -111,12 +114,14 @@ } else { /* other instructions */ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode)| + bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COND(cf->cond) | S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | - S_SQ_CF_WORD1_COUNT(cf->count) | - S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); + S_SQ_CF_WORD1_COUNT(cf->count); + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); + id++; } } return 0; diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/evergreen_compute.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/evergreen_compute.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/evergreen_compute.c 2017-08-14 14:37:28.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/evergreen_compute.c 2017-12-12 07:37:17.000000000 +0000 @@ -746,8 +746,9 @@ radeon_compute_set_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3); radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */ radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */ - S_0288D4_NUM_GPRS(ngpr) - | S_0288D4_STACK_SIZE(nstack)); + S_0288D4_NUM_GPRS(ngpr) | + S_0288D4_DX10_CLAMP(1) | + S_0288D4_STACK_SIZE(nstack)); radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/evergreen_state.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/evergreen_state.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/evergreen_state.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/evergreen_state.c 2017-12-12 07:37:17.000000000 +0000 @@ -3232,6 +3232,7 @@ r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */ S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | + S_028844_DX10_CLAMP(1) | S_028844_STACK_SIZE(rshader->bc.nstack)); /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ @@ -3252,6 +3253,7 @@ r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES, S_028890_NUM_GPRS(rshader->bc.ngpr) | + S_028890_DX10_CLAMP(1) | S_028890_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES, shader->bo->gpu_address >> 8); @@ -3314,6 +3316,7 @@ r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS, S_028878_NUM_GPRS(rshader->bc.ngpr) | + S_028878_DX10_CLAMP(1) | S_028878_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS, shader->bo->gpu_address >> 8); @@ -3354,6 +3357,7 @@ S_0286C4_VS_EXPORT_COUNT(nparams - 1)); r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS, S_028860_NUM_GPRS(rshader->bc.ngpr) | + S_028860_DX10_CLAMP(1) | S_028860_STACK_SIZE(rshader->bc.nstack)); if (rshader->vs_position_window_space) { r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, @@ -3388,6 +3392,7 @@ r600_init_command_buffer(cb, 32); r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS, S_0288BC_NUM_GPRS(rshader->bc.ngpr) | + S_0288BC_DX10_CLAMP(1) | S_0288BC_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS, shader->bo->gpu_address >> 8); @@ -3401,6 +3406,7 @@ r600_init_command_buffer(cb, 32); r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS, S_0288D4_NUM_GPRS(rshader->bc.ngpr) | + S_0288D4_DX10_CLAMP(1) | S_0288D4_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS, shader->bo->gpu_address >> 8); diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/r600_asm.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/r600_asm.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/r600_asm.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/r600_asm.c 2017-12-12 07:37:17.000000000 +0000 @@ -1625,7 +1625,8 @@ *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) | S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)| + S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); } /* common for r600/r700 - eg in eg_asm.c */ @@ -2088,6 +2089,8 @@ bc->bytecode[id + 1], cfop->name); fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr); fprintf(stderr, "\n"); + if (cf->end_of_program) + fprintf(stderr, "EOP "); } else if (cfop->flags & CF_EXP) { int o = 0; const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/r600_shader.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/r600_shader.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/r600_shader.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/r600_shader.c 2017-12-12 07:37:17.000000000 +0000 @@ -3660,7 +3660,7 @@ last = r600_isa_cf(ctx.bc->cf_last->op); /* alu clause instructions don't have EOP bit, so add NOP */ - if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) + if (!last || last->flags & CF_ALU) r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); ctx.bc->cf_last->end_of_program = 1; @@ -8853,8 +8853,9 @@ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */ + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, @@ -9051,8 +9052,8 @@ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, @@ -9274,8 +9275,8 @@ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/r600_state.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/r600_state.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/r600_state.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/r600_state.c 2017-12-12 07:37:17.000000000 +0000 @@ -2548,6 +2548,12 @@ r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2); r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/ S_028850_NUM_GPRS(rshader->bc.ngpr) | + /* + * docs are misleading about the dx10_clamp bit. This only affects + * instructions using CLAMP dst modifier, in which case they will + * return 0 with this set for a NaN (otherwise NaN). + */ + S_028850_DX10_CLAMP(1) | S_028850_STACK_SIZE(rshader->bc.nstack) | S_028850_UNCACHED_FIRST_INST(ufi)); r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */ @@ -2597,6 +2603,7 @@ S_0286C4_VS_EXPORT_COUNT(nparams - 1)); r600_store_context_reg(cb, R_028868_SQ_PGM_RESOURCES_VS, S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_DX10_CLAMP(1) | S_028868_STACK_SIZE(rshader->bc.nstack)); if (rshader->vs_position_window_space) { r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, @@ -2681,6 +2688,7 @@ r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_GS, S_02887C_NUM_GPRS(rshader->bc.ngpr) | + S_02887C_DX10_CLAMP(1) | S_02887C_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_02886C_SQ_PGM_START_GS, 0); /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ @@ -2695,6 +2703,7 @@ r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES, S_028890_NUM_GPRS(rshader->bc.ngpr) | + S_028890_DX10_CLAMP(1) | S_028890_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_028880_SQ_PGM_START_ES, 0); /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/r700_asm.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/r700_asm.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/r700_asm.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/r700_asm.c 2017-12-12 07:37:17.000000000 +0000 @@ -30,7 +30,8 @@ *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R700, cf->op)) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT(count) | - S_SQ_CF_WORD1_COUNT_3(count >> 3); + S_SQ_CF_WORD1_COUNT_3(count >> 3)| + S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); } int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 2016-02-23 21:08:27.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -933,6 +933,11 @@ cf_node *c = static_cast(*I); if (c->jump_after_target) { + if (c->jump_target->next == NULL) { + c->jump_target->insert_after(sh.create_cf(CF_OP_NOP)); + if (last_cf == c->jump_target) + last_cf = static_cast(c->jump_target->next); + } c->jump_target = static_cast(c->jump_target->next); c->jump_after_target = false; } diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/sb/sb_expr.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/sb/sb_expr.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/r600/sb/sb_expr.cpp 2016-04-06 13:00:02.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/r600/sb/sb_expr.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -753,7 +753,9 @@ n.bc.src[0].abs == n.bc.src[1].abs) { switch (n.bc.op) { case ALU_OP2_MIN: // (MIN x, x) => (MOV x) + case ALU_OP2_MIN_DX10: case ALU_OP2_MAX: + case ALU_OP2_MAX_DX10: convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs); return fold_alu_op1(n); case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/radeon/r600_texture.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/radeon/r600_texture.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/radeon/r600_texture.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/radeon/r600_texture.c 2017-12-12 07:37:17.000000000 +0000 @@ -2506,8 +2506,11 @@ assert(rtex->resource.b.b.nr_samples <= 1); clear_size = rtex->surface.dcc_size; } else { + unsigned num_layers = util_max_layer(&rtex->resource.b.b, level) + 1; + dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset; - clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size; + clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size * + num_layers; } rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, clear_size, diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/radeonsi/si_shader.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/radeonsi/si_shader.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/radeonsi/si_shader.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/radeonsi/si_shader.c 2017-12-12 07:37:17.000000000 +0000 @@ -5902,11 +5902,13 @@ key->vs_prolog.num_input_sgprs = num_input_sgprs; key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; key->vs_prolog.as_ls = shader_out->key.as_ls; + key->vs_prolog.as_es = shader_out->key.as_es; if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) { key->vs_prolog.as_ls = 1; key->vs_prolog.num_merged_next_stage_vgprs = 2; } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) { + key->vs_prolog.as_es = 1; key->vs_prolog.num_merged_next_stage_vgprs = 5; } @@ -6787,6 +6789,8 @@ switch (type) { case PIPE_SHADER_VERTEX: + shader.key.as_ls = key->vs_prolog.as_ls; + shader.key.as_es = key->vs_prolog.as_es; break; case PIPE_SHADER_TESS_CTRL: assert(!prolog); @@ -6829,10 +6833,15 @@ static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) { LLVMValueRef ptr[2], list; + bool is_merged_shader = + ctx->screen->b.chip_class >= GFX9 && + (ctx->type == PIPE_SHADER_TESS_CTRL || + ctx->type == PIPE_SHADER_GEOMETRY || + ctx->shader->key.as_ls || ctx->shader->key.as_es); /* Get the pointer to rw buffers. */ - ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS); - ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI); + ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); + ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI); list = lp_build_gather_values(&ctx->gallivm, ptr, 2); list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, ""); list = LLVMBuildIntToPtr(ctx->ac.builder, list, diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/radeonsi/si_shader.h mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/radeonsi/si_shader.h --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/radeonsi/si_shader.h 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/radeonsi/si_shader.h 2017-12-12 07:37:17.000000000 +0000 @@ -154,9 +154,6 @@ /* SGPR user data indices */ enum { - /* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs, - * and these two are used for other purposes. - */ SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */ SI_SGPR_RW_BUFFERS_HI, SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, @@ -459,6 +456,7 @@ unsigned num_merged_next_stage_vgprs:3; unsigned last_input:4; unsigned as_ls:1; + unsigned as_es:1; /* Prologs for monolithic shaders shouldn't set EXEC. */ unsigned is_monolithic:1; } vs_prolog; diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/radeonsi/si_state.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/radeonsi/si_state.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/drivers/radeonsi/si_state.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/drivers/radeonsi/si_state.c 2017-12-12 07:37:17.000000000 +0000 @@ -4624,7 +4624,7 @@ /* Return if DCC is enabled. The texture should be set up with it * already. */ - if (md->size_metadata >= 11 * 4 && + if (md->size_metadata >= 10 * 4 && /* at least 2(header) + 8(desc) dwords */ md->metadata[0] != 0 && md->metadata[1] == si_get_bo_metadata_word1(rscreen) && G_008F28_COMPRESSION_EN(desc[6])) { diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/state_trackers/wgl/stw_context.c mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/state_trackers/wgl/stw_context.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/gallium/state_trackers/wgl/stw_context.c 2017-08-14 14:37:28.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/gallium/state_trackers/wgl/stw_context.c 2017-12-12 07:37:17.000000000 +0000 @@ -133,6 +133,25 @@ /** + * Return the stw pixel format that most closely matches the pixel format + * on HDC. + * Used to get a pixel format when SetPixelFormat() hasn't been called before. + */ +static int +get_matching_pixel_format(HDC hdc) +{ + int iPixelFormat = GetPixelFormat(hdc); + PIXELFORMATDESCRIPTOR pfd; + + if (!iPixelFormat) + return 0; + if (!DescribePixelFormat(hdc, iPixelFormat, sizeof(pfd), &pfd)) + return 0; + return stw_pixelformat_choose(hdc, &pfd); +} + + +/** * Called via DrvCreateContext(), DrvCreateLayerContext() and * wglCreateContextAttribsARB() to actually create a rendering context. * \param handle the desired DHGLRC handle to use for the context, or zero @@ -174,7 +193,7 @@ * but not all do, and the opengl32 runtime seems to use a default * pixel format in some cases, so use that. */ - iPixelFormat = GetPixelFormat(hdc); + iPixelFormat = get_matching_pixel_format(hdc); if (!iPixelFormat) return 0; } @@ -458,7 +477,7 @@ * pixel format in some cases, so we must create a framebuffer for * those here. */ - int iPixelFormat = GetPixelFormat(hDrawDC); + int iPixelFormat = get_matching_pixel_format(hDrawDC); if (iPixelFormat) fb = stw_framebuffer_create( hDrawDC, iPixelFormat ); if (!fb) diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/git_sha1.h mesa-17.3.0~git20171212+17.3.49a612d1/src/git_sha1.h --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/git_sha1.h 2017-11-21 10:19:33.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/git_sha1.h 2017-12-12 07:37:18.000000000 +0000 @@ -1 +1 @@ -#define MESA_GIT_SHA1 "git-d1e6cf4639" +#define MESA_GIT_SHA1 "git-49a612d158" diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/intel/blorp/blorp_blit.c mesa-17.3.0~git20171212+17.3.49a612d1/src/intel/blorp/blorp_blit.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/intel/blorp/blorp_blit.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/intel/blorp/blorp_blit.c 2017-12-12 07:37:17.000000000 +0000 @@ -2344,7 +2344,7 @@ */ blorp_surf_convert_to_single_slice(isl_dev, info); - if (width || height) { + if (width && height) { #ifndef NDEBUG uint32_t right_edge_px = info->tile_x_sa + *x + *width; uint32_t bottom_edge_px = info->tile_y_sa + *y + *height; @@ -2357,7 +2357,7 @@ *height = DIV_ROUND_UP(*height, fmtl->bh); } - if (x || y) { + if (x && y) { assert(*x % fmtl->bw == 0); assert(*y % fmtl->bh == 0); *x /= fmtl->bw; diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/intel/compiler/brw_fs_saturate_propagation.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/intel/compiler/brw_fs_saturate_propagation.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/intel/compiler/brw_fs_saturate_propagation.cpp 2017-05-24 05:45:46.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/intel/compiler/brw_fs_saturate_propagation.cpp 2017-12-12 07:37:17.000000000 +0000 @@ -88,8 +88,14 @@ scan_inst->src[0].negate = !scan_inst->src[0].negate; inst->src[0].negate = false; } else if (scan_inst->opcode == BRW_OPCODE_MAD) { - scan_inst->src[0].negate = !scan_inst->src[0].negate; - scan_inst->src[1].negate = !scan_inst->src[1].negate; + for (int i = 0; i < 2; i++) { + if (scan_inst->src[i].file == IMM) { + brw_negate_immediate(scan_inst->src[i].type, + &scan_inst->src[i].as_brw_reg()); + } else { + scan_inst->src[i].negate = !scan_inst->src[i].negate; + } + } inst->src[0].negate = false; } else if (scan_inst->opcode == BRW_OPCODE_ADD) { if (scan_inst->src[1].file == IMM) { diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/intel/vulkan/anv_allocator.c mesa-17.3.0~git20171212+17.3.49a612d1/src/intel/vulkan/anv_allocator.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/intel/vulkan/anv_allocator.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/intel/vulkan/anv_allocator.c 2017-12-12 07:37:17.000000000 +0000 @@ -131,11 +131,13 @@ return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); } +#ifndef HAVE_MEMFD_CREATE static inline int memfd_create(const char *name, unsigned int flags) { return syscall(SYS_memfd_create, name, flags); } +#endif static inline uint32_t ilog2_round_up(uint32_t value) diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/intel/vulkan/anv_gem_stubs.c mesa-17.3.0~git20171212+17.3.49a612d1/src/intel/vulkan/anv_gem_stubs.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/intel/vulkan/anv_gem_stubs.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/intel/vulkan/anv_gem_stubs.c 2017-12-12 07:37:17.000000000 +0000 @@ -27,11 +27,13 @@ #include "anv_private.h" +#ifndef HAVE_MEMFD_CREATE static inline int memfd_create(const char *name, unsigned int flags) { return syscall(SYS_memfd_create, name, flags); } +#endif uint32_t anv_gem_create(struct anv_device *device, uint64_t size) diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mapi/glapi/gen/gl_table.py mesa-17.3.0~git20171212+17.3.49a612d1/src/mapi/glapi/gen/gl_table.py --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mapi/glapi/gen/gl_table.py 2017-08-14 14:37:28.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mapi/glapi/gen/gl_table.py 2017-12-12 07:37:17.000000000 +0000 @@ -58,12 +58,20 @@ print '#endif' print '' print '' + print '#ifdef __cplusplus' + print 'extern "C" {' + print '#endif' + print '' print 'struct _glapi_table' print '{' return def printRealFooter(self): print '};' + print '' + print '#ifdef __cplusplus' + print '}' + print '#endif' return diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_bufmgr.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_bufmgr.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_bufmgr.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_bufmgr.c 2017-12-12 07:37:17.000000000 +0000 @@ -1177,8 +1177,8 @@ return NULL; } -int -brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd) +static void +brw_bo_make_external(struct brw_bo *bo) { struct brw_bufmgr *bufmgr = bo->bufmgr; @@ -1190,6 +1190,14 @@ } mtx_unlock(&bufmgr->lock); } +} + +int +brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd) +{ + struct brw_bufmgr *bufmgr = bo->bufmgr; + + brw_bo_make_external(bo); if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, DRM_CLOEXEC, prime_fd) != 0) @@ -1200,6 +1208,14 @@ return 0; } +uint32_t +brw_bo_export_gem_handle(struct brw_bo *bo) +{ + brw_bo_make_external(bo); + + return bo->gem_handle; +} + int brw_bo_flink(struct brw_bo *bo, uint32_t *name) { @@ -1213,11 +1229,8 @@ if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) return -errno; + brw_bo_make_external(bo); mtx_lock(&bufmgr->lock); - if (!bo->external) { - _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); - bo->external = true; - } if (!bo->global_name) { bo->global_name = flink.name; _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_bufmgr.h mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_bufmgr.h --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_bufmgr.h 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_bufmgr.h 2017-12-12 07:37:17.000000000 +0000 @@ -337,6 +337,8 @@ struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd); +uint32_t brw_bo_export_gem_handle(struct brw_bo *bo); + int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, uint64_t *result); diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_context.h mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_context.h --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_context.h 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_context.h 2017-12-12 07:37:17.000000000 +0000 @@ -442,23 +442,26 @@ int reloc_array_size; }; +struct brw_growing_bo { + struct brw_bo *bo; + uint32_t *map; + uint32_t *cpu_map; +}; + struct intel_batchbuffer { /** Current batchbuffer being queued up. */ - struct brw_bo *bo; - /** Last BO submitted to the hardware. Used for glFinish(). */ - struct brw_bo *last_bo; + struct brw_growing_bo batch; /** Current statebuffer being queued up. */ - struct brw_bo *state_bo; + struct brw_growing_bo state; + + /** Last batchbuffer submitted to the hardware. Used for glFinish(). */ + struct brw_bo *last_bo; #ifdef DEBUG uint16_t emit, total; #endif uint16_t reserved_space; uint32_t *map_next; - uint32_t *map; - uint32_t *batch_cpu_map; - uint32_t *state_cpu_map; - uint32_t *state_map; uint32_t state_used; enum brw_gpu_ring ring; diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_meta_util.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_meta_util.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_meta_util.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_meta_util.c 2017-12-12 07:37:17.000000000 +0000 @@ -293,6 +293,17 @@ brw->mesa_to_isl_render_format[mt->format]) return false; + /* Gen9 doesn't support fast clear on single-sampled SRGB buffers. When + * GL_FRAMEBUFFER_SRGB is enabled any color renderbuffers will be + * resolved in intel_update_state. In that case it's pointless to do a + * fast clear because it's very likely to be immediately resolved. + */ + if (devinfo->gen >= 9 && + mt->surf.samples == 1 && + ctx->Color.sRGBEnabled && + _mesa_get_srgb_format_linear(mt->format) != mt->format) + return false; + const mesa_format format = _mesa_get_render_format(ctx, mt->format); if (_mesa_is_format_integer_color(format)) { if (devinfo->gen >= 8) { diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_misc_state.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_misc_state.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_misc_state.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_misc_state.c 2017-12-12 07:37:17.000000000 +0000 @@ -65,15 +65,15 @@ BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->batch.state_bo, 0, brw->vs.base.state_offset); + OUT_RELOC(brw->batch.state.bo, 0, brw->vs.base.state_offset); if (brw->ff_gs.prog_active) - OUT_RELOC(brw->batch.state_bo, 0, brw->ff_gs.state_offset | 1); + OUT_RELOC(brw->batch.state.bo, 0, brw->ff_gs.state_offset | 1); else OUT_BATCH(0); - OUT_RELOC(brw->batch.state_bo, 0, brw->clip.state_offset | 1); - OUT_RELOC(brw->batch.state_bo, 0, brw->sf.state_offset); - OUT_RELOC(brw->batch.state_bo, 0, brw->wm.base.state_offset); - OUT_RELOC(brw->batch.state_bo, 0, brw->cc.state_offset); + OUT_RELOC(brw->batch.state.bo, 0, brw->clip.state_offset | 1); + OUT_RELOC(brw->batch.state.bo, 0, brw->sf.state_offset); + OUT_RELOC(brw->batch.state.bo, 0, brw->wm.base.state_offset); + OUT_RELOC(brw->batch.state.bo, 0, brw->cc.state_offset); ADVANCE_BATCH(); brw->ctx.NewDriverState |= BRW_NEW_PSP; @@ -629,9 +629,9 @@ OUT_BATCH(0); OUT_BATCH(mocs_wb << 16); /* Surface state base address: */ - OUT_RELOC64(brw->batch.state_bo, 0, mocs_wb << 4 | 1); + OUT_RELOC64(brw->batch.state.bo, 0, mocs_wb << 4 | 1); /* Dynamic state base address: */ - OUT_RELOC64(brw->batch.state_bo, 0, mocs_wb << 4 | 1); + OUT_RELOC64(brw->batch.state.bo, 0, mocs_wb << 4 | 1); /* Indirect object base address: MEDIA_OBJECT data */ OUT_BATCH(mocs_wb << 4 | 1); OUT_BATCH(0); @@ -641,7 +641,7 @@ /* General state buffer size */ OUT_BATCH(0xfffff001); /* Dynamic state buffer size */ - OUT_BATCH(ALIGN(brw->batch.state_bo->size, 4096) | 1); + OUT_BATCH(ALIGN(MAX_STATE_SIZE, 4096) | 1); /* Indirect object upper bound */ OUT_BATCH(0xfffff001); /* Instruction access upper bound */ @@ -664,7 +664,7 @@ * BINDING_TABLE_STATE * SURFACE_STATE */ - OUT_RELOC(brw->batch.state_bo, 0, 1); + OUT_RELOC(brw->batch.state.bo, 0, 1); /* Dynamic state base address: * SAMPLER_STATE * SAMPLER_BORDER_COLOR_STATE @@ -675,7 +675,7 @@ * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset * Disable is clear, which we rely on) */ - OUT_RELOC(brw->batch.state_bo, 0, 1); + OUT_RELOC(brw->batch.state.bo, 0, 1); OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ @@ -696,7 +696,7 @@ BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(brw->batch.state_bo, 0, 1); /* Surface state base address */ + OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_RELOC(brw->cache.bo, 0, 1); /* Instruction base address */ OUT_BATCH(0xfffff001); /* General state upper bound */ @@ -707,7 +707,7 @@ BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(brw->batch.state_bo, 0, 1); /* Surface state base address */ + OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_sync.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_sync.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/brw_sync.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/brw_sync.c 2017-12-12 07:37:17.000000000 +0000 @@ -146,7 +146,7 @@ assert(!fence->batch_bo); assert(!fence->signalled); - fence->batch_bo = brw->batch.bo; + fence->batch_bo = brw->batch.batch.bo; brw_bo_reference(fence->batch_bo); if (intel_batchbuffer_flush(brw) < 0) { diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/gen4_blorp_exec.h mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/gen4_blorp_exec.h --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/gen4_blorp_exec.h 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/gen4_blorp_exec.h 2017-12-12 07:37:17.000000000 +0000 @@ -28,7 +28,7 @@ struct brw_context *brw = batch->driver_batch; return (struct blorp_address) { - .buffer = brw->batch.state_bo, + .buffer = brw->batch.state.bo, .offset = offset, }; } diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/genX_blorp_exec.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/genX_blorp_exec.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/genX_blorp_exec.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/genX_blorp_exec.c 2017-12-12 07:37:17.000000000 +0000 @@ -60,7 +60,7 @@ uint32_t offset; if (GEN_GEN < 6 && brw_ptr_in_state_buffer(&brw->batch, location)) { - offset = (char *)location - (char *)brw->batch.state_map; + offset = (char *)location - (char *)brw->batch.state.map; return brw_state_reloc(&brw->batch, offset, address.buffer, address.offset + delta, address.reloc_flags); @@ -68,7 +68,7 @@ assert(!brw_ptr_in_state_buffer(&brw->batch, location)); - offset = (char *)location - (char *)brw->batch.map; + offset = (char *)location - (char *)brw->batch.batch.map; return brw_batch_reloc(&brw->batch, offset, address.buffer, address.offset + delta, address.reloc_flags); @@ -86,7 +86,7 @@ brw_state_reloc(&brw->batch, ss_offset, bo, address.offset + delta, address.reloc_flags); - void *reloc_ptr = (void *)brw->batch.state_map + ss_offset; + void *reloc_ptr = (void *)brw->batch.state.map + ss_offset; #if GEN_GEN >= 8 *(uint64_t *)reloc_ptr = reloc_val; #else @@ -150,7 +150,7 @@ void *data = brw_state_batch(brw, size, 64, &offset); *addr = (struct blorp_address) { - .buffer = brw->batch.state_bo, + .buffer = brw->batch.state.bo, .offset = offset, #if GEN_GEN == 10 diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/genX_state_upload.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/genX_state_upload.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/genX_state_upload.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/genX_state_upload.c 2017-12-12 07:37:17.000000000 +0000 @@ -89,7 +89,7 @@ return address.offset + delta; } else { if (GEN_GEN < 6 && brw_ptr_in_state_buffer(batch, location)) { - offset = (char *) location - (char *) brw->batch.state_map; + offset = (char *) location - (char *) brw->batch.state.map; return brw_state_reloc(batch, offset, address.bo, address.offset + delta, address.reloc_flags); @@ -97,7 +97,7 @@ assert(!brw_ptr_in_state_buffer(batch, location)); - offset = (char *) location - (char *) brw->batch.map; + offset = (char *) location - (char *) brw->batch.batch.map; return brw_batch_reloc(batch, offset, address.bo, address.offset + delta, address.reloc_flags); @@ -1279,7 +1279,7 @@ clip.GuardbandClipTestEnable = true; clip.ClipperViewportStatePointer = - ro_bo(brw->batch.state_bo, brw->clip.vp_offset); + ro_bo(brw->batch.state.bo, brw->clip.vp_offset); clip.ScreenSpaceViewportXMin = -1; clip.ScreenSpaceViewportXMax = 1; @@ -1496,7 +1496,7 @@ * domain. */ sf.SetupViewportStateOffset = - ro_bo(brw->batch.state_bo, brw->sf.vp_offset); + ro_bo(brw->batch.state.bo, brw->sf.vp_offset); sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT; @@ -1789,7 +1789,7 @@ if (stage_state->sampler_count) wm.SamplerStatePointer = - ro_bo(brw->batch.state_bo, stage_state->sampler_offset); + ro_bo(brw->batch.state.bo, stage_state->sampler_offset); #if GEN_GEN == 5 if (wm_prog_data->prog_offset_2) wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2; @@ -2082,7 +2082,7 @@ vs.StatisticsEnable = false; vs.SamplerStatePointer = - ro_bo(brw->batch.state_bo, stage_state->sampler_offset); + ro_bo(brw->batch.state.bo, stage_state->sampler_offset); #endif #if GEN_GEN == 5 @@ -3331,7 +3331,7 @@ cc.StatisticsEnable = brw->stats_wm; cc.CCViewportStatePointer = - ro_bo(brw->batch.state_bo, brw->cc.vp_offset); + ro_bo(brw->batch.state.bo, brw->cc.vp_offset); #else /* _NEW_COLOR */ cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0]; @@ -4332,7 +4332,7 @@ raster.CullMode = CULLMODE_NONE; } - point->SmoothFlag = raster.SmoothPointEnable; + raster.SmoothPointEnable = point->SmoothFlag; raster.DXMultisampleRasterizationEnable = _mesa_is_multisample_enabled(ctx); @@ -5083,7 +5083,7 @@ } #if GEN_GEN < 6 samp_st.BorderColorPointer = - ro_bo(brw->batch.state_bo, border_color_offset); + ro_bo(brw->batch.state.bo, border_color_offset); #else samp_st.BorderColorPointer = border_color_offset; #endif diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/intel_batchbuffer.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/intel_batchbuffer.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/intel_batchbuffer.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/intel_batchbuffer.c 2017-12-12 07:37:17.000000000 +0000 @@ -52,15 +52,6 @@ #define BATCH_SZ (20 * 1024) #define STATE_SZ (16 * 1024) -/* The kernel assumes batchbuffers are smaller than 256kB. */ -#define MAX_BATCH_SIZE (256 * 1024) - -/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base - * Address, which means that we can't put binding tables beyond 64kB. This - * effectively limits the maximum statebuffer size to 64kB. - */ -#define MAX_STATE_SIZE (64 * 1024) - static void intel_batchbuffer_reset(struct brw_context *brw); @@ -93,11 +84,11 @@ const struct gen_device_info *devinfo = &screen->devinfo; if (!devinfo->has_llc) { - batch->batch_cpu_map = malloc(BATCH_SZ); - batch->map = batch->batch_cpu_map; - batch->map_next = batch->map; - batch->state_cpu_map = malloc(STATE_SZ); - batch->state_map = batch->state_cpu_map; + batch->batch.cpu_map = malloc(BATCH_SZ); + batch->batch.map = batch->batch.cpu_map; + batch->map_next = batch->batch.map; + batch->state.cpu_map = malloc(STATE_SZ); + batch->state.map = batch->state.cpu_map; } init_reloc_list(&batch->batch_relocs, 250); @@ -180,20 +171,21 @@ brw_bo_unreference(batch->last_bo); batch->last_bo = NULL; } - batch->last_bo = batch->bo; + batch->last_bo = batch->batch.bo; - batch->bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096); - if (!batch->batch_cpu_map) { - batch->map = brw_bo_map(brw, batch->bo, MAP_READ | MAP_WRITE); + batch->batch.bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096); + if (!batch->batch.cpu_map) { + batch->batch.map = + brw_bo_map(brw, batch->batch.bo, MAP_READ | MAP_WRITE); } - batch->map_next = batch->map; + batch->map_next = batch->batch.map; - batch->state_bo = brw_bo_alloc(bufmgr, "statebuffer", STATE_SZ, 4096); - batch->state_bo->kflags = + batch->state.bo = brw_bo_alloc(bufmgr, "statebuffer", STATE_SZ, 4096); + batch->state.bo->kflags = can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0; - if (!batch->state_cpu_map) { - batch->state_map = - brw_bo_map(brw, batch->state_bo, MAP_READ | MAP_WRITE); + if (!batch->state.cpu_map) { + batch->state.map = + brw_bo_map(brw, batch->state.bo, MAP_READ | MAP_WRITE); } /* Avoid making 0 a valid state offset - otherwise the decoder will try @@ -201,8 +193,8 @@ */ batch->state_used = 1; - add_exec_bo(batch, batch->bo); - assert(batch->bo->index == 0); + add_exec_bo(batch, batch->batch.bo); + assert(batch->batch.bo->index == 0); batch->needs_sol_reset = false; batch->state_base_address_emitted = false; @@ -251,8 +243,8 @@ void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - free(batch->batch_cpu_map); - free(batch->state_cpu_map); + free(batch->batch.cpu_map); + free(batch->state.cpu_map); for (int i = 0; i < batch->exec_count; i++) { brw_bo_unreference(batch->exec_bos[i]); @@ -263,8 +255,8 @@ free(batch->validation_list); brw_bo_unreference(batch->last_bo); - brw_bo_unreference(batch->bo); - brw_bo_unreference(batch->state_bo); + brw_bo_unreference(batch->batch.bo); + brw_bo_unreference(batch->state.bo); if (batch->state_batch_sizes) _mesa_hash_table_destroy(batch->state_batch_sizes, NULL); } @@ -302,7 +294,8 @@ uint32_t *old_map = *map_ptr; struct brw_bo *old_bo = *bo_ptr; - struct brw_bo *new_bo = brw_bo_alloc(bufmgr, old_bo->name, new_size, 4096); + struct brw_bo *new_bo = + brw_bo_alloc(bufmgr, old_bo->name, new_size, old_bo->align); uint32_t *new_map; perf_debug("Growing %s - ran out of space\n", old_bo->name); @@ -321,9 +314,12 @@ * This guarantees that our relocations continue to work: values we've * already written into the buffer, values we're going to write into the * buffer, and the validation/relocation lists all will match. + * + * Also preserve kflags for EXEC_OBJECT_CAPTURE. */ new_bo->gtt_offset = old_bo->gtt_offset; new_bo->index = old_bo->index; + new_bo->kflags = old_bo->kflags; /* Batch/state buffers are per-context, and if we've run out of space, * we must have actually used them before, so...they will be in the list. @@ -370,17 +366,16 @@ } const unsigned batch_used = USED_BATCH(*batch) * 4; - if (batch_used + sz >= BATCH_SZ) { - if (!batch->no_wrap) { - intel_batchbuffer_flush(brw); - } else { - const unsigned new_size = - MIN2(batch->bo->size + batch->bo->size / 2, MAX_BATCH_SIZE); - grow_buffer(brw, &batch->bo, &batch->map, &batch->batch_cpu_map, - batch_used, new_size); - batch->map_next = (void *) batch->map + batch_used; - assert(batch_used + sz < batch->bo->size); - } + if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) { + intel_batchbuffer_flush(brw); + } else if (batch_used + sz >= batch->batch.bo->size) { + const unsigned new_size = + MIN2(batch->batch.bo->size + batch->batch.bo->size / 2, + MAX_BATCH_SIZE); + grow_buffer(brw, &batch->batch.bo, &batch->batch.map, + &batch->batch.cpu_map, batch_used, new_size); + batch->map_next = (void *) batch->batch.map + batch_used; + assert(batch_used + sz < batch->batch.bo->size); } /* The intel_batchbuffer_flush() calls above might have changed @@ -437,16 +432,16 @@ if (batch->ring != RENDER_RING) return; - uint32_t *batch_data = brw_bo_map(brw, batch->bo, MAP_READ); - uint32_t *state = brw_bo_map(brw, batch->state_bo, MAP_READ); + uint32_t *batch_data = brw_bo_map(brw, batch->batch.bo, MAP_READ); + uint32_t *state = brw_bo_map(brw, batch->state.bo, MAP_READ); if (batch_data == NULL || state == NULL) { fprintf(stderr, "WARNING: failed to map batchbuffer/statebuffer\n"); return; } uint32_t *end = batch_data + USED_BATCH(*batch); - uint32_t batch_gtt_offset = batch->bo->gtt_offset; - uint32_t state_gtt_offset = batch->state_bo->gtt_offset; + uint32_t batch_gtt_offset = batch->batch.bo->gtt_offset; + uint32_t state_gtt_offset = batch->state.bo->gtt_offset; int length; bool color = INTEL_DEBUG & DEBUG_COLOR; @@ -567,8 +562,8 @@ } } - brw_bo_unmap(batch->bo); - brw_bo_unmap(batch->state_bo); + brw_bo_unmap(batch->batch.bo); + brw_bo_unmap(batch->state.bo); } #else static void do_batch_dump(struct brw_context *brw) { } @@ -590,7 +585,7 @@ brw->batch.exec_count = 0; brw->batch.aperture_space = 0; - brw_bo_unreference(brw->batch.state_bo); + brw_bo_unreference(brw->batch.state.bo); /* Create a new batchbuffer and reset the associated state: */ intel_batchbuffer_reset_and_clear_render_cache(brw); @@ -786,18 +781,18 @@ struct intel_batchbuffer *batch = &brw->batch; int ret = 0; - if (batch->batch_cpu_map) { - void *bo_map = brw_bo_map(brw, batch->bo, MAP_WRITE); - memcpy(bo_map, batch->batch_cpu_map, 4 * USED_BATCH(*batch)); + if (batch->batch.cpu_map) { + void *bo_map = brw_bo_map(brw, batch->batch.bo, MAP_WRITE); + memcpy(bo_map, batch->batch.cpu_map, 4 * USED_BATCH(*batch)); } - if (batch->state_cpu_map) { - void *bo_map = brw_bo_map(brw, batch->state_bo, MAP_WRITE); - memcpy(bo_map, batch->state_cpu_map, batch->state_used); + if (batch->state.cpu_map) { + void *bo_map = brw_bo_map(brw, batch->state.bo, MAP_WRITE); + memcpy(bo_map, batch->state.cpu_map, batch->state_used); } - brw_bo_unmap(batch->bo); - brw_bo_unmap(batch->state_bo); + brw_bo_unmap(batch->batch.bo); + brw_bo_unmap(batch->state.bo); if (!brw->screen->no_hw) { /* The requirement for using I915_EXEC_NO_RELOC are: @@ -825,19 +820,19 @@ uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0; /* Set statebuffer relocations */ - const unsigned state_index = batch->state_bo->index; + const unsigned state_index = batch->state.bo->index; if (state_index < batch->exec_count && - batch->exec_bos[state_index] == batch->state_bo) { + batch->exec_bos[state_index] == batch->state.bo) { struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[state_index]; - assert(entry->handle == batch->state_bo->gem_handle); + assert(entry->handle == batch->state.bo->gem_handle); entry->relocation_count = batch->state_relocs.reloc_count; entry->relocs_ptr = (uintptr_t) batch->state_relocs.relocs; } /* Set batchbuffer relocations */ struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0]; - assert(entry->handle == batch->bo->gem_handle); + assert(entry->handle == batch->batch.bo->gem_handle); entry->relocation_count = batch->batch_relocs.reloc_count; entry->relocs_ptr = (uintptr_t) batch->batch_relocs.relocs; @@ -899,7 +894,7 @@ intel_upload_finish(brw); if (brw->throttle_batch[0] == NULL) { - brw->throttle_batch[0] = brw->batch.bo; + brw->throttle_batch[0] = brw->batch.batch.bo; brw_bo_reference(brw->throttle_batch[0]); } @@ -921,7 +916,7 @@ if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { fprintf(stderr, "waiting for idle\n"); - brw_bo_wait_rendering(brw->batch.bo); + brw_bo_wait_rendering(brw->batch.batch.bo); } /* Start a new batch buffer. */ @@ -994,7 +989,7 @@ struct brw_bo *target, uint32_t target_offset, unsigned int reloc_flags) { - assert(batch_offset <= batch->bo->size - sizeof(uint32_t)); + assert(batch_offset <= batch->batch.bo->size - sizeof(uint32_t)); return emit_reloc(batch, &batch->batch_relocs, batch_offset, target, target_offset, reloc_flags); @@ -1005,7 +1000,7 @@ struct brw_bo *target, uint32_t target_offset, unsigned int reloc_flags) { - assert(state_offset <= batch->state_bo->size - sizeof(uint32_t)); + assert(state_offset <= batch->state.bo->size - sizeof(uint32_t)); return emit_reloc(batch, &batch->state_relocs, state_offset, target, target_offset, reloc_flags); @@ -1045,22 +1040,20 @@ { struct intel_batchbuffer *batch = &brw->batch; - assert(size < batch->bo->size); + assert(size < batch->state.bo->size); uint32_t offset = ALIGN(batch->state_used, alignment); - if (offset + size >= STATE_SZ) { - if (!batch->no_wrap) { - intel_batchbuffer_flush(brw); - offset = ALIGN(batch->state_used, alignment); - } else { - const unsigned new_size = - MIN2(batch->state_bo->size + batch->state_bo->size / 2, - MAX_STATE_SIZE); - grow_buffer(brw, &batch->state_bo, &batch->state_map, - &batch->state_cpu_map, batch->state_used, new_size); - assert(offset + size < batch->state_bo->size); - } + if (offset + size >= STATE_SZ && !batch->no_wrap) { + intel_batchbuffer_flush(brw); + offset = ALIGN(batch->state_used, alignment); + } else if (offset + size >= batch->state.bo->size) { + const unsigned new_size = + MIN2(batch->state.bo->size + batch->state.bo->size / 2, + MAX_STATE_SIZE); + grow_buffer(brw, &batch->state.bo, &batch->state.map, + &batch->state.cpu_map, batch->state_used, new_size); + assert(offset + size < batch->state.bo->size); } if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { @@ -1072,7 +1065,7 @@ batch->state_used = offset + size; *out_offset = offset; - return batch->state_map + (offset >> 2); + return batch->state.map + (offset >> 2); } void diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/intel_batchbuffer.h mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/intel_batchbuffer.h --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/intel_batchbuffer.h 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/intel_batchbuffer.h 2017-12-12 07:37:17.000000000 +0000 @@ -10,6 +10,15 @@ extern "C" { #endif +/* The kernel assumes batchbuffers are smaller than 256kB. */ +#define MAX_BATCH_SIZE (256 * 1024) + +/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base + * Address, which means that we can't put binding tables beyond 64kB. This + * effectively limits the maximum statebuffer size to 64kB. + */ +#define MAX_STATE_SIZE (64 * 1024) + struct intel_batchbuffer; void intel_batchbuffer_init(struct brw_context *brw); @@ -55,7 +64,8 @@ uint32_t target_offset, unsigned flags); -#define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map)) +#define USED_BATCH(_batch) \ + ((uintptr_t)((_batch).map_next - (_batch).batch.map)) static inline uint32_t float_as_int(float f) { @@ -113,8 +123,8 @@ static inline bool brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p) { - return (char *) p >= (char *) batch->state_map && - (char *) p < (char *) batch->state_map + batch->state_bo->size; + return (char *) p >= (char *) batch->state.map && + (char *) p < (char *) batch->state.map + batch->state.bo->size; } #define BEGIN_BATCH(n) do { \ @@ -131,7 +141,7 @@ #define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f))) #define OUT_RELOC(buf, flags, delta) do { \ - uint32_t __offset = (__map - brw->batch.map) * 4; \ + uint32_t __offset = (__map - brw->batch.batch.map) * 4; \ uint32_t reloc = \ brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \ OUT_BATCH(reloc); \ @@ -139,7 +149,7 @@ /* Handle 48-bit address relocations for Gen8+ */ #define OUT_RELOC64(buf, flags, delta) do { \ - uint32_t __offset = (__map - brw->batch.map) * 4; \ + uint32_t __offset = (__map - brw->batch.batch.map) * 4; \ uint64_t reloc64 = \ brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \ OUT_BATCH(reloc64); \ diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/intel_mipmap_tree.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/intel_mipmap_tree.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 2017-12-12 07:37:17.000000000 +0000 @@ -207,7 +207,13 @@ if (!brw->mesa_format_supports_render[mt->format]) return false; - return true; + if (devinfo->gen >= 9) { + mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format); + const enum isl_format isl_format = + brw_isl_format_for_mesa_format(linear_format); + return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format); + } else + return true; } static bool @@ -250,7 +256,7 @@ * our HW tends to support more linear formats than sRGB ones, we use this * format variant for check for CCS_E compatibility. */ -static bool +MAYBE_UNUSED static bool format_ccs_e_compat_with_miptree(const struct gen_device_info *devinfo, const struct intel_mipmap_tree *mt, enum isl_format access_format) @@ -284,13 +290,12 @@ if (!intel_miptree_supports_ccs(brw, mt)) return false; - /* Many window system buffers are sRGB even if they are never rendered as - * sRGB. For those, we want CCS_E for when sRGBEncode is false. When the - * surface is used as sRGB, we fall back to CCS_D. + /* Fast clear can be also used to clear srgb surfaces by using equivalent + * linear format. This trick, however, can't be extended to be used with + * lossless compression and therefore a check is needed to see if the format + * really is linear. */ - mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format); - enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format); - return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format); + return _mesa_get_srgb_format_linear(mt->format) == mt->format; } /** @@ -2685,27 +2690,29 @@ return ISL_AUX_USAGE_MCS; case ISL_AUX_USAGE_CCS_D: - return mt->mcs_buf ? ISL_AUX_USAGE_CCS_D : ISL_AUX_USAGE_NONE; - - case ISL_AUX_USAGE_CCS_E: { - /* If the format supports CCS_E and is compatible with the miptree, - * then we can use it. + /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of + * the single-sampled color renderbuffers because the CCS buffer isn't + * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is + * enabled because otherwise the surface state will be programmed with + * the linear equivalent format anyway. */ - if (format_ccs_e_compat_with_miptree(&brw->screen->devinfo, - mt, render_format)) - return ISL_AUX_USAGE_CCS_E; - - /* Otherwise, we have to fall back to CCS_D */ + if (isl_format_is_srgb(render_format) && + _mesa_get_srgb_format_linear(mt->format) != mt->format) { + return ISL_AUX_USAGE_NONE; + } else if (!mt->mcs_buf) { + return ISL_AUX_USAGE_NONE; + } else { + return ISL_AUX_USAGE_CCS_D; + } - /* gen9 hardware technically supports non-0/1 clear colors with sRGB - * formats. However, there are issues with blending where it doesn't - * properly apply the sRGB curve to the clear color when blending. + case ISL_AUX_USAGE_CCS_E: { + /* Lossless compression is not supported for SRGB formats, it + * should be impossible to get here with such surfaces. */ - if (blend_enabled && isl_format_is_srgb(render_format) && - !isl_color_value_is_zero_one(mt->fast_clear_color, render_format)) - return ISL_AUX_USAGE_NONE; + assert(!isl_format_is_srgb(render_format) || + _mesa_get_srgb_format_linear(mt->format) == mt->format); - return ISL_AUX_USAGE_CCS_D; + return ISL_AUX_USAGE_CCS_E; } default: diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/intel_screen.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/intel_screen.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/drivers/dri/i965/intel_screen.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/drivers/dri/i965/intel_screen.c 2017-12-12 07:37:17.000000000 +0000 @@ -774,7 +774,7 @@ *value = image->pitch; return true; case __DRI_IMAGE_ATTRIB_HANDLE: - *value = image->bo->gem_handle; + *value = brw_bo_export_gem_handle(image->bo); return true; case __DRI_IMAGE_ATTRIB_NAME: return !brw_bo_flink(image->bo, (uint32_t *) value); diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/main/teximage.c mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/main/teximage.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/main/teximage.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/main/teximage.c 2017-12-12 07:37:18.000000000 +0000 @@ -122,6 +122,56 @@ return format; } +/** + * Returns a corresponding base format for a given internal floating point + * format as specifed by OES_texture_float. + */ +static GLenum +oes_float_internal_format(const struct gl_context *ctx, + GLenum format, GLenum type) +{ + switch (type) { + case GL_FLOAT: + if (ctx->Extensions.OES_texture_float) { + switch (format) { + case GL_RGBA32F: + return GL_RGBA; + case GL_RGB32F: + return GL_RGB; + case GL_ALPHA32F_ARB: + return GL_ALPHA; + case GL_LUMINANCE32F_ARB: + return GL_LUMINANCE; + case GL_LUMINANCE_ALPHA32F_ARB: + return GL_LUMINANCE_ALPHA; + default: + break; + } + } + break; + + case GL_HALF_FLOAT_OES: + if (ctx->Extensions.OES_texture_half_float) { + switch (format) { + case GL_RGBA16F: + return GL_RGBA; + case GL_RGB16F: + return GL_RGB; + case GL_ALPHA16F_ARB: + return GL_ALPHA; + case GL_LUMINANCE16F_ARB: + return GL_LUMINANCE; + case GL_LUMINANCE_ALPHA16F_ARB: + return GL_LUMINANCE_ALPHA; + default: + break; + } + } + break; + } + return format; +} + /** * Install gl_texture_image in a gl_texture_object according to the target @@ -2155,6 +2205,10 @@ return GL_TRUE; } + GLenum internalFormat = _mesa_is_gles(ctx) ? + oes_float_internal_format(ctx, texImage->InternalFormat, type) : + texImage->InternalFormat; + /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the * combinations of format, internalFormat, and type that can be used. * Formats and types that require additional extensions (e.g., GL_FLOAT @@ -2162,7 +2216,7 @@ */ if (_mesa_is_gles(ctx) && texture_format_error_check_gles(ctx, format, type, - texImage->InternalFormat, + internalFormat, dimensions, callerName)) { return GL_TRUE; } diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/state_tracker/st_glsl_to_tgsi.cpp mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/state_tracker/st_glsl_to_tgsi.cpp --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 2017-12-12 07:37:18.000000000 +0000 @@ -5222,7 +5222,7 @@ defined = 0; inst2 = (glsl_to_tgsi_instruction *) inst->next; - do { + while (!inst2->is_tail_sentinel()) { if (inst->op == inst2->op && inst2->dst[defined].file == PROGRAM_UNDEFINED && inst->src[0].file == inst2->src[0].file && @@ -5231,9 +5231,9 @@ inst->src[0].swizzle == inst2->src[0].swizzle) break; inst2 = (glsl_to_tgsi_instruction *) inst2->next; - } while (inst2); + } - if (!inst2) { + if (inst2->is_tail_sentinel()) { /* Undefined destinations are not allowed, substitute with an unused * temporary register. */ diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/util/disk_cache.c mesa-17.3.0~git20171212+17.3.49a612d1/src/util/disk_cache.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/util/disk_cache.c 2017-11-21 10:19:32.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/util/disk_cache.c 2017-12-12 07:37:18.000000000 +0000 @@ -1170,7 +1170,7 @@ disk_cache_put_key(struct disk_cache *cache, const cache_key key) { const uint32_t *key_chunk = (const uint32_t *) key; - int i = *key_chunk & CACHE_INDEX_KEY_MASK; + int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK; unsigned char *entry; entry = &cache->stored_keys[i * CACHE_KEY_SIZE]; @@ -1189,7 +1189,7 @@ disk_cache_has_key(struct disk_cache *cache, const cache_key key) { const uint32_t *key_chunk = (const uint32_t *) key; - int i = *key_chunk & CACHE_INDEX_KEY_MASK; + int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK; unsigned char *entry; entry = &cache->stored_keys[i * CACHE_KEY_SIZE]; diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/src/util/sha1/sha1.c mesa-17.3.0~git20171212+17.3.49a612d1/src/util/sha1/sha1.c --- mesa-17.3.0~git20171121+17.3.d1e6cf46/src/util/sha1/sha1.c 2017-02-12 09:42:57.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/src/util/sha1/sha1.c 2017-12-12 07:37:18.000000000 +0000 @@ -16,6 +16,7 @@ #include #include +#include "u_endian.h" #include "sha1.h" #define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) @@ -24,7 +25,7 @@ * blk0() and blk() perform the initial expand. * I got the idea of expanding during the round function from SSLeay */ -#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef PIPE_ARCH_LITTLE_ENDIAN # define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \ |(rol(block->l[i],8)&0x00FF00FF)) #else diff -Nru mesa-17.3.0~git20171121+17.3.d1e6cf46/VERSION mesa-17.3.0~git20171212+17.3.49a612d1/VERSION --- mesa-17.3.0~git20171121+17.3.d1e6cf46/VERSION 2017-11-21 10:19:31.000000000 +0000 +++ mesa-17.3.0~git20171212+17.3.49a612d1/VERSION 2017-12-12 07:37:17.000000000 +0000 @@ -1 +1 @@ -17.3.0-rc5 +17.3.0