diff -Nru mesa-10.6.0~git20150310.5750595c/autogen.sh mesa-10.6.0~git20150318.27bf37ba/autogen.sh --- mesa-10.6.0~git20150310.5750595c/autogen.sh 2012-08-30 05:23:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/autogen.sh 2015-03-18 08:43:34.000000000 +0000 @@ -6,8 +6,8 @@ ORIGDIR=`pwd` cd "$srcdir" -autoreconf -v --install || exit 1 -cd $ORIGDIR || exit $? +autoreconf --force --verbose --install || exit 1 +cd "$ORIGDIR" || exit $? if test -z "$NOCONFIGURE"; then "$srcdir"/configure "$@" diff -Nru mesa-10.6.0~git20150310.5750595c/ChangeLog mesa-10.6.0~git20150318.27bf37ba/ChangeLog --- mesa-10.6.0~git20150310.5750595c/ChangeLog 2015-03-10 16:53:27.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/ChangeLog 2015-03-18 08:44:53.000000000 +0000 @@ -1,9 +1,1837 @@ -commit 68ceeb9b8b083d9cba4d243c765a8582d9413847 +commit f27c13d1a51df265dc611ab1910d12024fa4a1e2 Author: Rico Tzschichholz -Date: Tue Mar 10 17:52:50 2015 +0100 +Date: Wed Mar 18 09:43:35 2015 +0100 Add debian tree from origin/ubuntu +commit 27bf37ba05b69ebf6f373d1637a26b4839265921 +Author: Jason Ekstrand +Date: Mon Mar 16 15:08:04 2015 -0700 + + nir/peephole_select: Allow uniform/input loads and load_const + + Shader-db results on HSW: + + total instructions in shared programs: 4174156 -> 4157291 (-0.40%) + instructions in affected programs: 145397 -> 128532 (-11.60%) + helped: 383 + HURT: 0 + GAINED: 20 + LOST: 22 + + There are two more tests lost than gained. However, comparing this with + GLSL IR vs. NIR results, the overall delta is reduced from 85/44 + gained/lost on current master to 71/32 with this commit. Therefore, I + think it's probably a boon since we are getting "closer" to where we were + before. + + Reviewed-by: Connor Abbott + +commit 1be862c0c4965a0184908df736a30d354498ba3d +Author: Jason Ekstrand +Date: Mon Mar 16 14:55:00 2015 -0700 + + nir/peephole_select: Copy instructions into the block before the if + + Previously we tried to do poor-man's copy propagation as we created the + select instructions. Instead, this commit just moves the instructions from + the blocks inside the if into the block before. Copy propagation will take + care of making sure we don't have any extra mov's in there for us. + + Reviewed-by: Connor Abbott + +commit 8cf40ed05dbd3a62ee817e7ebc9409cf327c10ce +Author: Jason Ekstrand +Date: Mon Mar 16 14:45:54 2015 -0700 + + nir/peephole_select: Rename are_all_move_to_phi and use a switch + + Reviewed-by: Connor Abbott + +commit cc5ddd584d17abd422ae4d8e83805969485740d9 +Author: Mario Kleiner +Date: Thu Mar 12 20:34:06 2015 +0100 + + glx: Handle out-of-sequence swap completion events correctly. (v2) + + The code for emitting INTEL_swap_events swap completion + events needs to translate from 32-Bit sbc on the wire to + 64-Bit sbc for the events and handle wraparound accordingly. + + It assumed that events would be sent by the server in the + order their corresponding swap requests were emitted from + the client, iow. sbc count should be always increasing. This + was correct for DRI2. + + This is not always the case under the DRI3/Present backend, + where the Present extension can execute presents and send out + completion events in a different order than the submission + order of the present requests, due to client code specifying + targetMSC target vblank counts which are not strictly + monotonically increasing. This confused the wraparound + handling. This patch fixes the problem by handling 32-Bit + wraparound in both directions. As long as successive swap + completion events real 64-Bit sbc's don't differ by more + than 2^30, this should be able to do the right thing. + + How this is supposed to work: + + awire->sbc contains the low 32-Bits of the true 64-Bit sbc + of the current swap event, transmitted over the wire. + + glxDraw->lastEventSbc contains the low 32-Bits of the 64-Bit + sbc of the most recently processed swap event. + + glxDraw->eventSbcWrap is a 64-Bit offset which tracks the upper + 32-Bits of the current sbc. The final 64-Bit output sbc + aevent->sbc is computed from the sum of awire->sbc and + glxDraw->eventSbcWrap. + + Under DRI3/Present, swap completion events can be received + slightly out of order due to non-monotic targetMsc specified + by client code, e.g., present request submission: + + Submission sbc: 1 2 3 + targetMsc: 10 11 9 + + Reception of completion events: + Completion sbc: 3 1 2 + + The completion sequence 3, 1, 2 would confuse the old wraparound + handling made for DRI2 as 1 < 3 --> Assumes a 32-Bit wraparound + has happened when it hasn't. + + The client can queue multiple present requests, in the case of + Mesa up to n requests for n-buffered rendering, e.g., n = 2-4 in + the current Mesa GLX DRI3/Present implementation. In the case of + direct Pixmap presents via xcb_present_pixmap() the number n is + limited by the amount of memory available. + + We reasonably assume that the number of outstanding requests n is + much less than 2 billion due to memory contraints and common sense. + Therefore while the order of received sbc's can be a bit scrambled, + successive 64-Bit sbc's won't deviate by much, a given sbc may be + a few counts lower or higher than the previous received sbc. + + Therefore any large difference between the incoming awire->sbc and + the last recorded glxDraw->lastEventSbc will be due to 32-Bit + wraparound and we need to adapt glxDraw->eventSbcWrap accordingly + to adjust the upper 32-Bits of the sbc. + + Two cases, correponding to the two if-statements in the patch: + + a) Previous sbc event was below the last 2^32 boundary, in the previous + glxDraw->eventSbcWrap epoch, the new sbc event is in the next 2^32 + epoch, therefore the low 32-Bit awire->sbc wrapped around to zero, + or close to zero --> awire->sbc is apparently much lower than the + glxDraw->lastEventSbc recorded for the previous epoch + + --> We need to increment glxDraw->eventSbcWrap by 2^32 to adjust + the current epoch to be one higher than the previous one. + + --> Case a) also handles the old DRI2 behaviour. + + b) Previous sbc event was above closest 2^32 boundary, but now a + late event from the previous 2^32 epoch arrives, with a true sbc + that belongs to the previous 2^32 segment, so the awire->sbc of + this late event has a high count close to 2^32, whereas + glxDraw->lastEventSbc is closer to zero --> awire->sbc is much + greater than glXDraw->lastEventSbc. + + --> We need to decrement glxDraw->eventSbcWrap by 2^32 to adjust + the current epoch back to the previous lower epoch of this late + completion event. + + We assume such a wraparound to a higher (a) epoch or lower (b) + epoch has happened if awire->sbc and glxDraw->lastEventSbc differ + by more than 2^30 counts, as such a difference can only happen + on wraparound, or if somehow 2^30 present requests would be pending + for a given drawable inside the server, which is rather unlikely. + + v2: Explain the reason for this patch and the new wraparound handling + much more extensive in commit message, no code change wrt. initial + version. + + Cc: "10.3 10.4 10.5" + Signed-off-by: Mario Kleiner + Reviewed-by: Michel Dänzer + +commit 3f94a5afcbab5ebcc4229cb3c65140ad5b5dafca +Author: Emil Velikov +Date: Mon Mar 16 14:47:09 2015 +0000 + + r600g: constify r600_shader_tgsi_instruction lists. + + Massive list of constant data. Annotate it as such. + + Signed-off-by: Emil Velikov + Reviewed-by: Marek Olšák + +commit 63cf2b4448f96c8b69c11dac14d8c55742dc6918 +Author: Emil Velikov +Date: Mon Mar 16 14:47:08 2015 +0000 + + r600g: kill off r600_shader_tgsi_instruction::{tgsi_opcode,is_op3} + + Both of which are no longer used. Use designated initializer to make + things obvious as people add/remove TGSI_OPCODEs. + + Signed-off-by: Emil Velikov + Reviewed-by: Marek Olšák + +commit 5e68c6b32284a5d8d65cf87359f321fc135bdc1c +Author: Emil Velikov +Date: Mon Mar 16 14:47:07 2015 +0000 + + r600g: use the tgsi opcode from parse.FullToken.FullInstruction + + ... rather than the local one in inst_info->tgsi_opcode. + + This will allow us to simplify struct r600_shader_tgsi_instruction. + + Signed-off-by: Emil Velikov + Reviewed-by: Marek Olšák + +commit 6db5e134b627d24e3d6d42a6835e6595652c5aab +Author: Ian Romanick +Date: Sat Feb 28 08:32:57 2015 -0800 + + i965/fs: Apply gl_FrontFacing ? -1 : 1 optimization only for floats + + At the very least, unreal4/sun-temple/102.shader_test uses this pattern + for a signed integer result. However, that shader did not hit the + optimization in the first place because it uses !gl_FrontFacing. I + changed the shader to use remove the logical-not and reverse the other + operands. I verified that incorrect code is generated before this + change and correct code is generated after. + + Fixes fs-frontfacing-ternary-1-neg-1.shader_test. + + No shader-db changes. + + Signed-off-by: Ian Romanick + Reviewed-by: Matt Turner + +commit 4a53445b0d199489b2d1ae7d8654791e42b16804 +Author: Ian Romanick +Date: Sat Feb 28 08:26:37 2015 -0800 + + i965/fs: Change try_opt_frontfacing_ternary to eliminate asserts + + If we check for the case that is actually necessary, the asserts + become superfluous. + + Signed-off-by: Ian Romanick + Reviewed-by: Matt Turner + +commit ce3f46397d77141156f81dd7fcf06fb936e2b0ef +Author: Ian Romanick +Date: Tue Feb 3 21:12:28 2015 +0200 + + i965/fs: Handle CMP.nz ... 0 and AND.nz ... 1 similarly in cmod propagation + + Espically on platforms that do not natively generate 0u and ~0u for + Boolean results, we generate a lot of sequences where a CMP is + followed by an AND with 1. emit_bool_to_cond_code does this, for + example. On ILK, this results in a sequence like: + + add(8) g3<1>F g8<8,8,1>F -g4<0,1,0>F + cmp.l.f0(8) g3<1>D g3<8,8,1>F 0F + and.nz.f0(8) null g3<8,8,1>D 1D + (+f0) iff(8) Jump: 6 + + The AND.nz is obviously redundant. By propagating the cmod, we can + instead generate + + add.l.f0(8) null g8<8,8,1>F -g4<0,1,0>F + (+f0) iff(8) Jump: 6 + + Existing code already handles the propagation from the CMP to the ADD. + + Shader-db results: + + GM45 (0x2A42): + total instructions in shared programs: 3550829 -> 3550788 (-0.00%) + instructions in affected programs: 10028 -> 9987 (-0.41%) + helped: 24 + + Iron Lake (0x0046): + total instructions in shared programs: 4993146 -> 4993105 (-0.00%) + instructions in affected programs: 9675 -> 9634 (-0.42%) + helped: 24 + + Ivy Bridge (0x0166): + total instructions in shared programs: 6291870 -> 6291794 (-0.00%) + instructions in affected programs: 17914 -> 17838 (-0.42%) + helped: 48 + + Haswell (0x0426): + total instructions in shared programs: 5779256 -> 5779180 (-0.00%) + instructions in affected programs: 16694 -> 16618 (-0.46%) + helped: 48 + + Broadwell (0x162E): + total instructions in shared programs: 6823088 -> 6823014 (-0.00%) + instructions in affected programs: 15824 -> 15750 (-0.47%) + helped: 46 + + No chage on Sandy Bridge or on any platform when NIR is used. + + v2: Add unit tests suggested by Matt. Remove spurious writes_flag() + check on scan_inst when scan_inst is known to be BRW_OPCODE_CMP (also + suggested by Matt). + + v3: Fix some comments and remove some explicit int() casts in fs_reg + constructors in the unit tests. Both suggested by Matt. + + Signed-off-by: Ian Romanick + Reviewed-by: Matt Turner + +commit d35720da9b9824d104532028775e497491f433ad +Author: Matt Turner +Date: Wed Mar 4 17:27:21 2015 -0800 + + i965: Mark paths in linear <-> tiled functions as unreachable(). + + text data bss dec hex filename + 9663 0 0 9663 25bf intel_tiled_memcpy.o before + 8215 0 0 8215 2017 intel_tiled_memcpy.o after + + Reviewed-by: Carl Worth + Reviewed-by: Jason Ekstrand + +commit 6c6e2a15aa7e8c0fd9a1180a901389c1692992c3 +Author: Matt Turner +Date: Fri Mar 13 17:00:26 2015 -0700 + + egl: Remove eglQueryString virtual dispatch. + + Reviewed-by: Chad Versace + +commit 827da841a1b3dbd4252c39be99965710c5085f5a +Author: Laura Ekstrand +Date: Tue Mar 17 13:27:31 2015 -0700 + + main: Correct _mesa_error with no format in bufferobj.c. + + This fixes Bug 89616, a build failure due to line 1639 of bufferobj.c: + _mesa_error(ctx, GL_INVALID_OPERATION, func); + + Trivial. + +commit 579297c8bdffd92f47a8cc02100b9535822d2ae7 +Author: Laura Ekstrand +Date: Wed Feb 11 16:53:46 2015 -0800 + + main: Cosmetic changes to GetBufferSubData. + + Reviewed-by: Fredrik Höglund + +commit 23eab47bbe998b95d5da889b85b7b0ca6e14385b +Author: Laura Ekstrand +Date: Tue Jan 20 15:24:53 2015 -0800 + + main: Add entry point for GetNamedBufferSubData. + + Reviewed-by: Fredrik Höglund + +commit 3706ace2446825b9544e45800c0ce1df261a1c30 +Author: Laura Ekstrand +Date: Mon Mar 16 16:08:36 2015 -0700 + + main: Cosmetic updates to GetBufferPointerv. + + v3: Review from Fredrik Hoglund + -Split cosmetic refactor of GetBufferPointerv out into a separate commit + + Reviewed-by: Fredrik Höglund + +commit 105ddc6aea397bd5d39b8ffcd25278ed12102e3c +Author: Laura Ekstrand +Date: Tue Jan 20 14:32:35 2015 -0800 + + main: Add entry point for GetNamedBufferPointerv. + + v3: Review from Fredrik Hoglund + -Split cosmetic refactor of GetBufferPointerv out into a separate commit + + Reviewed-by: Fredrik Höglund + +commit 1e45752aaf4ac7d2324d71bda4d2ac34f3abf8bd +Author: Laura Ekstrand +Date: Wed Feb 11 16:10:20 2015 -0800 + + main: Add entry points for GetNamedBufferParameteri[64]v. + + Reviewed-by: Fredrik Höglund + +commit efcb830d49d601140a62a096a4ff4c215e68d89c +Author: Laura Ekstrand +Date: Wed Feb 11 16:07:45 2015 -0800 + + main: Refactor GetBufferParameteri[64]v. + + v2: Split into a refactor commit and an entry point commit. + + Reviewed-by: Fredrik Höglund + +commit 1cfc18da8d3220fd6b123b6e269b3b440988027b +Author: Laura Ekstrand +Date: Wed Feb 11 16:06:52 2015 -0800 + + main: Add entry point for FlushMappedNamedBufferRange. + + Reviewed-by: Fredrik Höglund + +commit ee5fae6e897a38f5104859851eb8fba84180cfa8 +Author: Laura Ekstrand +Date: Wed Jan 14 17:01:20 2015 -0800 + + main: Refactor FlushMappedBufferRange. + + v2:-Remove "_mesa" from in front of static software fallback. + -Split out the refactor from the addition of the DSA entry points. + + Reviewed-by: Fredrik Höglund + +commit f7f5df99542d6492fffd803d77d5f7d2f44d08c9 +Author: Laura Ekstrand +Date: Wed Jan 14 14:52:01 2015 -0800 + + main: Add entry point for UnmapNamedBuffer. + + v2: review from Ian Romanick + - Restore VBO_DEBUG and BOUNDS_CHECK + - Remove _mesa from static software fallback unmap_buffer. + + Reviewed-by: Fredrik Höglund + +commit a0cc03929e754692ae593df5072d144460434297 +Author: Laura Ekstrand +Date: Wed Feb 11 14:09:52 2015 -0800 + + main: Add entry points for MapNamedBuffer[Range]. + + Reviewed-by: Fredrik Höglund + +commit 4f513bc330393c4615b4bad98e3e634408123960 +Author: Laura Ekstrand +Date: Wed Jan 14 12:44:39 2015 -0800 + + main: Refactor MapBuffer[Range]. + + v2: review from Jason Ekstrand + - Split refactor from addition of DSA entry points. + review from Ian Romanick + - Remove "_mesa" from static software fallback map_buffer_range + - Restore VBO_DEBUG and BOUNDS_CHECK + + Reviewed-by: Fredrik Höglund + +commit 16244525fbe09ff41074eb36a435875892e316a9 +Author: Laura Ekstrand +Date: Wed Feb 11 11:45:57 2015 -0800 + + main: Minor whitespace fixes in ClearNamedBuffer[Sub]Data. + + Reviewed-by: Fredrik Höglund + +commit 5030d0a4f79c3309bad04cc257beb97f74f84f61 +Author: Laura Ekstrand +Date: Wed Feb 11 12:17:38 2015 -0800 + + main: Add entry points for ClearNamedBuffer[Sub]Data. + + Reviewed-by: Martin Peres + +commit 9fa6c3637a53603bc92db8a97b71cf35d88e5176 +Author: Laura Ekstrand +Date: Tue Jan 13 15:20:19 2015 -0800 + + main: Refactor ClearBuffer[Sub]Data. + + v2: review by Jason Ekstrand + - Split refactor of clear buffer sub data from addition of DSA entry + points. + + Reviewed-by: Martin Peres + +commit 4adaad5fcc7fc959f24f807e783b6be2903e08a3 +Author: Laura Ekstrand +Date: Tue Jan 13 13:28:08 2015 -0800 + + main: Add entry point for CopyNamedBufferSubData. + + v2: remove _mesa in front of static software fallback. + + Reviewed-by: Martin Peres + +commit 9cb732b8e9fad4a603f38ce896cd84300e2743a3 +Author: Laura Ekstrand +Date: Wed Feb 11 11:06:42 2015 -0800 + + main: Improve errors and style in BufferSubData. + + - More explicit error reporting. + - Removed legacy style. + + Reviewed-by: Martin Peres + +commit 566ccdf11b37363255bf5d20d7ab6639ddaf1b30 +Author: Laura Ekstrand +Date: Tue Jan 13 11:28:17 2015 -0800 + + main: Add entry point for NamedBufferSubData. + + v2: review by Ian Romanick + - Remove "_mesa" from name of static software fallback buffer_sub_data. + - Remove mappedRange from _mesa_buffer_sub_data. + - Removed some cosmetic changes to a separate commit. + + Reviewed-by: Martin Peres + +commit cb56835f870de01ed9c638d1470af38775bb6f72 +Author: Laura Ekstrand +Date: Mon Feb 9 17:57:46 2015 -0800 + + main: Add entry point for NamedBufferData. + + v2: review from Ian Romanick + - Fix space in ARB_direct_state_access.xml. + - Remove "_mesa" from the name of buffer_data static fallback. + - Restore VBO_DEBUG and BOUNDS_CHECK. + - Fix beginning of comment to start on same line as /* + + Reviewed-by: Martin Peres + +commit a76808dc19580855eb39c0904f3254edd765aa50 +Author: Laura Ekstrand +Date: Fri Jan 9 16:17:10 2015 -0800 + + main: Add entry point for NamedBufferStorage. + + Reviewed-by: Martin Peres + +commit 2cf48c37c1e2946f7c0648e0a5927a90209f59a4 +Author: Laura Ekstrand +Date: Thu Dec 18 17:10:06 2014 -0800 + + main: Add entry point for CreateBuffers. + + Reviewed-by: Martin Peres + +commit 44ecf0793d872e771edc448436f7a2fd7c3390f5 +Author: Laura Ekstrand +Date: Tue Mar 17 09:43:52 2015 -0700 + + Revert "main: _mesa_cube_level_complete checks NumLayers." + + This reverts commit 1ee000a0b6737d6c140d4f07b6044908b8ebfdc7. + Failures with the GLES3 conformance suite and Synmark2 OGLHdrBloom revealed + that this commit was in error. + + Extensive testing with Piglit prior to patch review and upstreaming did not + reveal this problem because, in the few Piglit tests that test for cube + completeness, NumLayers = 6. This is because all of the existing tests use + TextureStorage to initialize the texture, which sets NumLayers. + + A new Piglit test has been sent to the mailing list that reproduces the bug + related to this patch ("texturing: Testing + glGenerateMipmap(GL_TEXTURE_CUBE_MAP) without glTexStorage2D"). + + Reviewed-by: Jason Ekstrand + +commit 5a06ee7384934f8b5177b2f01bb7dff08b370145 +Author: Neil Roberts +Date: Thu Mar 12 17:41:07 2015 +0000 + + i965/skl: Send a message header when doing constant loads SIMD4x2 + + Commit 0ac4c272755c7 made it add a header for the send message when + using SIMD4x2 on Skylake because without this it will end up using + SIMD8D. However the patch missed the case when a sampler is being used + to implement constant loads from a buffer surface in a SIMD4x2 vertex + shader. + + This fixes 29 Piglit tests, mostly related to the ARL instruction in + vertex programs. + + Reviewed-by: Kristian Høgsberg + Tested-by: Anuj Phogat + +commit 627c68308683abbd6e563a09af6013a33938a790 +Author: Tapani Pälli +Date: Mon Mar 16 10:08:08 2015 +0200 + + i965/fs: in MAD optimizations, switch last argument to be immediate + + Commit bb33a31 introduced optimizations that transform cases of MAD + in to simpler forms but it did not take in to account that src[0] + can not be immediate and did not report progress. Patch switches + src[0] and src[1] if src[0] is immediate and adds progress + reporting. If both sources are immediates, this is taken care of by + the same opt_algebraic pass on later run. + + v2: Fix for all cases, use temporary fs_reg (Matt, Kenneth) + + Signed-off-by: Tapani Pälli + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89569 + Reviewed-by: Francisco Jerez (v1) + Reviewed-by: Kenneth Graunke + Cc: "10.5" + +commit 60f77b22b1e3bbef7e4d1f10012acf830d81ed7b +Author: Vinson Lee +Date: Sat Mar 14 01:45:03 2015 -0700 + + common.py: Fix PEP 8 issues. + + Signed-off-by: Vinson Lee + Reviewed-by: Brian Paul + +commit 2372275d2fc9e4d3785b34a5aeb7b6126e8cc402 +Author: Roland Scheidegger +Date: Fri Mar 13 23:45:20 2015 +0100 + + gallivm: abort properly when running out of buffer space in lp_disassembly + + Before this actually ran into an infinite loop printing out "invalid"... + + Reviewed-by: Brian Paul + Reviewed-by: Jose Fonseca + +commit 9d1682d619426d0a5b90a12df82390cdfa098107 +Author: Marek Olšák +Date: Mon Mar 16 23:24:15 2015 +0100 + + docs/GL3: also mark GLES3/GS5 for radeonsi as done + +commit c066669b8df9fa5a5e87354b613988944c9a0e5c +Author: Emil Velikov +Date: Mon Mar 16 15:00:19 2015 +0000 + + st/dri: remove unused include from the automake/scons build + + st/dri/common hasn't been around for a while. + + Signed-off-by: Emil Velikov + Reviewed-by: Brian Paul + +commit 55f0c0a29f788c5df4820e81c0cf93613ccedf5e +Author: Emil Velikov +Date: Mon Mar 16 15:00:18 2015 +0000 + + auxiliary/os: fix the android build - s/drm_munmap/os_munmap/ + + Squash this silly typo introduced with commit c63eb5dd5ec(auxiliary/os: get + the mmap/munmap wrappers working with android) + + Cc: "10.4 10.5" + Signed-off-by: Emil Velikov + Reviewed-by: Brian Paul + +commit 5664f57df3b7dfc5def189d1ee7a1b3df7d92bd6 +Author: Emil Velikov +Date: Mon Mar 16 11:50:47 2015 +0000 + + gallium/sw/kms: trivial cleanups + + Remove the forward declaration and make use of the DEBUG_PRINT macro for + debug builds. + + Signed-off-by: Emil Velikov + Reviewed-by: Brian Paul + +commit 771cd266b9d00bdcf2cf7acaa3c8363c358d7478 +Author: Emil Velikov +Date: Wed Mar 11 19:12:35 2015 +0000 + + loader: include for non-sysfs builds + + Required by fstat(), otherwise we'll error out due to implicit function + declaration. + + Cc: "10.4 10.5" + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89530 + Signed-off-by: Emil Velikov + Reported-by: Vadim Rutkovsky + Tested-by: Vadim Rutkovsky + +commit aead7fe2e2b6c89258f80a25299f4ec0fece2d95 +Author: Felix Janda +Date: Mon Feb 2 20:04:16 2015 +0100 + + c11/threads: Use PTHREAD_MUTEX_RECURSIVE by default + + Previously PTHREAD_MUTEX_RECURSIVE_NP had been used on linux for + compatibility with old glibc. Since mesa defines __GNU_SOURCE__ + on linux PTHREAD_MUTEX_RECURSIVE is also available since at least + 1998. So we can unconditionally use the portable version + PTHREAD_MUTEX_RECURSIVE. + + Cc: "10.5" + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88534 + Reviewed-by: Emil Velikov + +commit b5f19db9766ac54d78b8087b0433011f908ebd2c +Author: Marek Olšák +Date: Sat Feb 28 14:31:45 2015 +0100 + + radeonsi: implement TGSI_OPCODE_BFI (v2) + + v2: Don't use the intrinsics, the shader backend can recognize these + patterns and generates optimal code automatically. + + Reviewed-by: Tom Stellard + +commit d3723c614fb42c22e4e87fe8151bbb36462b425a +Author: Marek Olšák +Date: Fri Feb 27 19:09:30 2015 +0100 + + radeonsi: add a helper for extracting bitfields from parameters (v2) + + This will be used a lot (especially by tessellation). + + v2: don't use the bfe intrinsic + + Reviewed-by: Tom Stellard + +commit 9735a62a2c6007e7ee7baa5a769575a0adb5fda3 +Author: Antia Puentes +Date: Thu Mar 12 13:59:17 2015 +0100 + + i965: Emit IF/ELSE/ENDIF/WHILE JIP with type W on Gen7 + + IvyBridge and Haswell PRM say that the JIP should be emitted + with type W but we were using UD. The previous implementation + did not show adverse effects, but IMHO it is safer to follow + the specification thoroughly. + + Reviewed-by: Matt Turner + Signed-off-by: Antia Puentes + +commit dc394136404eafec689874934db0198be6182c59 +Author: Marek Olšák +Date: Sun Mar 15 20:13:52 2015 +0100 + + radeonsi: move scratch reloc state setup + + - move it to its own function + - do it after all states are emitted + - bump SI_MAX_DRAW_CS_DWORDS + + Reviewed-by: Michel Dänzer + +commit 567c8d73008a672cb71a84a4724829d34e1652b2 +Author: Marek Olšák +Date: Sun Mar 15 19:24:13 2015 +0100 + + radeonsi: don't emit PA_SC_LINE_STIPPLE if not rendering lines + + Reviewed-by: Michel Dänzer + +commit 1f4bb3826464e2ce1d3f47183c96e6e7fde9a1d7 +Author: Marek Olšák +Date: Sun Mar 15 19:21:31 2015 +0100 + + radeonsi: don't emit PA_SC_LINE_STIPPLE after every rasterizer state change + + Do it only when the line stipple state is changed. + + Reviewed-by: Michel Dänzer + +commit f5832f3f9dd0ac0b401d351acab19425fe3c1187 +Author: Marek Olšák +Date: Sun Mar 15 18:53:50 2015 +0100 + + radeonsi: move PA_SU_SC_MODE_CNTL to rasterizer state + + This requires enabling the optional GL provoking vertex behavior for quads. + + + some cosmetic changes, so that the register is set exactly the same as + on r600. + + Reviewed-by: Michel Dänzer + +commit 98a23982227dce29b015dcb5a867d05f2bee4388 +Author: Marek Olšák +Date: Sun Mar 15 18:20:19 2015 +0100 + + radeonsi: implement line and polygon smoothing + + Reviewed-by: Michel Dänzer + +commit 303d23e10d2caad69b2d122f45c78fee2906fc09 +Author: Marek Olšák +Date: Sun Mar 15 18:11:19 2015 +0100 + + radeonsi: add shader code for smoothing + + The fragment shader multiplies the alpha channel with gl_SampleMaskIn. + If blending is enabled, it looks like MSAA. + + Reviewed-by: Michel Dänzer + +commit 4f20a8f278aa92fb0dc6abc6998171b3ddea7dc1 +Author: Marek Olšák +Date: Sun Mar 15 17:54:29 2015 +0100 + + radeonsi: split sample locations into its own state atom + + Sample locations are not updated as often as framebuffers. + + Reviewed-by: Michel Dänzer + +commit f7796a966d20b04c00025bdc170883f4179a5697 +Author: Marek Olšák +Date: Sun Mar 15 17:14:53 2015 +0100 + + radeonsi: add basic code for overrasterization + + This will be used for line and polygon smoothing. + This is GCN-only even though it's in shared code. + + Reviewed-by: Michel Dänzer + +commit 1921fa430452304e42059e36b654d9d446371526 +Author: Marek Olšák +Date: Sat Feb 28 17:22:54 2015 +0100 + + radeonsi: small cleanup in si_shader_selector_key + + Reviewed-by: Michel Dänzer + +commit 52ff1edc5161dd7090fa55a11969c79dd4baad7d +Author: Marek Olšák +Date: Sat Feb 28 17:16:57 2015 +0100 + + radeonsi: simplify accessing alpha pointer in si_llvm_emit_fs_epilogue + + Reviewed-by: Michel Dänzer + +commit 955ebf2890f18973a128ef3a6a6cfe4416fabaef +Author: Marek Olšák +Date: Fri Mar 13 16:21:11 2015 +0100 + + radeonsi: add support for easy opcodes from ARB_gpu_shader5 + + I have to use the BFE instrinsics, because BFE is one of the most complex + instructions that can't be matched easily. BFE has 3 conditional branches + and one of them is quite big. + + In the isel DAG, lowered BFE has 27 nodes (including leafs). + +commit 755a2907a3e7f896f86861254554543d815bfad3 +Author: Marek Olšák +Date: Sat Feb 28 14:01:43 2015 +0100 + + radeonsi: implement bit-finding opcodes from ARB_gpu_shader5 + + Reviewed-by: Glenn Kennard + +commit ca90cde81eb48a50286193c6bbef9ef47c70a0c6 +Author: Marek Olšák +Date: Sat Feb 28 00:30:26 2015 +0100 + + radeonsi: implement gl_SampleMaskIn + + Reviewed-by: Glenn Kennard + +commit f9fd0c4a55afd97fd34d0e846000c75f5f6ecac2 +Author: Marek Olšák +Date: Mon Mar 2 02:40:57 2015 +0100 + + radeonsi: add support for SQRT + + Reviewed-by: Tom Stellard + Reviewed-by: Glenn Kennard + +commit d73c1c1304a205c8bf6d1cad1dd1d9a421ce2f32 +Author: Marek Olšák +Date: Sat Feb 28 00:44:19 2015 +0100 + + radeonsi: add support for FMA + + Reviewed-by: Tom Stellard + Reviewed-by: Glenn Kennard + +commit dfea35666e8031e9565a51eda1ee98837dbd044f +Author: Marek Olšák +Date: Fri Feb 27 18:39:40 2015 +0100 + + gallium/radeon: don't use LLVMReadOnlyAttribute for ALU + + None of the instructions use a pointer argument. + (+ small cosmetic changes) + + Reviewed-by: Tom Stellard + +commit 9da9c8e3f4f8f06c32efa8344b0a995d34c3b592 +Author: Marek Olšák +Date: Sat Feb 28 00:34:53 2015 +0100 + + tgsi: handle bitwise opcodes in tgsi_opcode_infer_type (v2) + + v2: set the same types as the destination type in tgsi_exec + + Reviewed-by: Ilia Mirkin + +commit 216543ea547dd0572d9f2f0364f7a239a5aeafe1 +Author: Marek Olšák +Date: Sat Feb 28 00:26:31 2015 +0100 + + gallium: add FMA and DFMA opcodes (v3) + + Needed by ARB_gpu_shader5. + + v2: select DMAD for FMA with double precision + v3: add and select DFMA + + Reviewed-by: Ilia Mirkin + +commit e92bc6b38e90339a394e95a562bcce35c3ee9696 +Author: Rob Clark +Date: Sun Mar 15 17:59:01 2015 -0400 + + freedreno: update generated headers + + Fix a3xx texture layer-size. + + Signed-off-by: Rob Clark + Cc: "10.4 10.5" + +commit d3fb949c039b80385d35c11ca86e8e7c5a84ae44 +Author: Rob Clark +Date: Wed Mar 11 15:10:25 2015 -0400 + + freedreno/ir3: remove old compiler + + Now that piglit is no longer falling back to old compiler for any tests, + we can remove it. Hurray \o/ + + Signed-off-by: Rob Clark + +commit feb858b788cf27b31d12ad8a00805f015d4063cc +Author: Rob Clark +Date: Wed Mar 11 13:21:42 2015 -0400 + + freedreno/ir3: avoid scheduler deadlock + + Deadlock can occur if we schedule an address register write, yet some + instructions which depend on that address register value also depend on + other unscheduled instructions that depend on a different address + register value. To solve this, before scheduling an address register + write, ensure that all the other dependencies of the instructions which + consume this address register are already scheduled. + + Signed-off-by: Rob Clark + +commit 7208e96bb810a7a6c92fd11bb7f4df8c9b7f1a2d +Author: Rob Clark +Date: Wed Mar 11 12:36:26 2015 -0400 + + freedreno/ir3: bit of cleanup + + Add an array_insert() macro to simplify inserting into dynamically sized + arrays, add a comment, and remove unused prototype inherited from the + original freedreno.git/fdre-a3xx test code, etc. + + Signed-off-by: Rob Clark + +commit db095eb43bd02414e71f93e72ff61b463bef0ece +Author: Kenneth Graunke +Date: Fri Mar 13 14:34:06 2015 -0700 + + i965: De-duplicate is_expression_commutative() functions. + + Create a backend_inst::is_commutative() method to replace two static + functions that did the exact same thing. + + Signed-off-by: Kenneth Graunke + Reviewed-by: Topi Pohjolainen + +commit f68a973dfb8926ac872b0b0e3b4b5c2163389d06 +Author: Chris Forbes +Date: Mon Dec 8 20:37:00 2014 +1300 + + i965/gen4-5: Cope with immutable-format texture revalidation + + This is unfortunately sometimes necessary due to rebasing levels when + rendering into them. + + 16 piglits crash -> pass, when building mesa with debug enabled. + + Signed-off-by: Chris Forbes + Reviewed-by: Kenneth Graunke + +commit 8ed1b65b62665810291562b59f1e983f7a78a0fc +Author: Emil Velikov +Date: Fri Mar 13 23:36:33 2015 +0000 + + docs: add news item and link release notes for mesa 10.5.1 + + Signed-off-by: Emil Velikov + +commit 5f72847a885518eacafc05d10e1cb52b978ba061 +Author: Emil Velikov +Date: Fri Mar 13 23:32:12 2015 +0000 + + docs: Add sha256 sums for the 10.5.1 release + + Signed-off-by: Emil Velikov + (cherry picked from commit 2abba086ca84f200fae940129c0a5342c3748f00) + +commit 6c966089374d7cb7a7778e91de182f54fc70e07c +Author: Emil Velikov +Date: Fri Mar 13 22:32:57 2015 +0000 + + Add release notes for the 10.5.1 release + + Signed-off-by: Emil Velikov + (cherry picked from commit 11c0ff60ef19cca84452aa989fb8bb25127473e0) + +commit 620e29b74821fd75b24495ab2bfddea53fc75350 +Author: Ilia Mirkin +Date: Fri Mar 13 01:36:57 2015 -0400 + + freedreno: fix slice pitch calculations + + For example if width were 65, the first slice would get 96 while the + second would get 32. However the hardware appears to expect the second + pitch to be 64, based on halving the 96 (and aligning up to 32). + + This fixes texelFetch piglit tests on a3xx below a certain size. Going + higher they break again, but most likely due to unrelated reasons. + + Signed-off-by: Ilia Mirkin + Cc: "10.4 10.5" + Reviewed-by: Rob Clark + +commit 89b26d5a360ebde11a69f2cdefa66e4d6a2a13fd +Author: Ilia Mirkin +Date: Fri Mar 13 00:53:49 2015 -0400 + + freedreno/a3xx: use the same layer size for all slices + + We only program in one layer size per texture, so that means that all + levels must share one size. This makes the piglit test + + bin/texelFetch fs sampler2DArray + + have the same breakage as its non-array version instead of being + completely off, and makes + + bin/ext_texture_array-gen-mipmap + + start passing. + + Signed-off-by: Ilia Mirkin + Cc: "10.4 10.5" + Reviewed-by: Rob Clark + +commit e76a8dc8ed8af9ea9c99ff4f84948fa834317ee9 +Author: Ian Romanick +Date: Tue Feb 24 20:57:18 2015 -0500 + + i965/vs: Add missing resolve_bool_comparison calls on GEN4 and GEN5 + + The ir_unop_any problem was discovered by some later optimization passes + that generate ir_triop_csel. I was also able to reproduce it by + modifying the gl-2.0-vertexattribpointer vertex shader to generate its + result using + + color = mix(vec4(0, 1, 0, 0), + vec4(1, 0, 0, 0), + bvec4(any(greaterThan(diff, vec4(tolerance))))); + + instead of an if-statement. This also required using #version 130 and + MESA_GLSL_VERSION_OVERRIDE=130. + + I have not nominated this for stable releases because I don't think + there's any way to trigger the problem without GLSL 1.30 or + optimizations that don't exist in stable. + + Signed-off-by: Ian Romanick + Reviewed-by: Abdiel Janulgue + +commit 21ff9bfe1cea8c0a51e9f607cc580df62baa3445 +Author: Chris Forbes +Date: Sat Mar 14 07:10:11 2015 +1300 + + i965/disasm: Fix format strings + + Most of the brw_inst_* api returns 64bit values. This fixes disassembly + of sampler messages, etc. + + Signed-off-by: Chris Forbes + Reviewed-by: Matt Turner + +commit 7c3095d6b71c410fd625ead797c78a0f5376904d +Author: Chris Forbes +Date: Sat Mar 14 07:10:10 2015 +1300 + + i965/disasm: Mark format() as being printf-style. + + This allows us to get warnings from GCC when we mess up the format + strings. + + Signed-off-by: Chris Forbes + Reviewed-by: Matt Turner + +commit 97399fc751a0f9750c4f9585dfed14b662ebec2e +Author: Matt Turner +Date: Wed Mar 11 18:43:56 2015 -0700 + + docs: List ARB_shading_language_packing/EXT_shader_integer_mix. + + Reviewed-by: Carl Worth + Reviewed-by: Marek Olšák + +commit 8d3aa5926b73c67c7dbd4477b7177aaa00c533e5 +Author: Matt Turner +Date: Wed Mar 11 18:14:28 2015 -0700 + + glsl: Expose built-in packing functions under GLSL 4.2. + + ARB_shading_language_packing is part of GLSL 4.2, not 4.0 as I + mistakenly believed. The following functions are available only with + ARB_shading_language_packing, GLSL 4.2 (not GLSL 4.0), or ES 3.0: + + - packSnorm2x16 + - unpackSnorm2x16 + - packHalf2x16 + - unpackHalf2x16 + + Reviewed-by: Carl Worth + Reviewed-by: Marek Olšák + +commit dac2e7deaae2540645a6b485c7d1f47195689116 +Author: Matt Turner +Date: Tue Mar 10 11:41:57 2015 -0700 + + egl: Create queryable strings in eglInitialize(). + + Creating/recreating the strings in eglQueryString() is extra work and + isn't thread-safe, as exhibited by shader-db's run.c using libepoxy. + + Multiple threads in run.c call eglReleaseThread() around the same time. + libepoxy calls eglQueryString() to determine whether eglReleaseThread() + exists, and our EGL implementation passes a pointer to the version + string to libepoxy while simultaneously overwriting the string, leading + to a failure in libepoxy. + + Moreover, the EGL spec says (emphasis mine): + + "eglQueryString returns a pointer to a *static*, zero-terminated string" + + This patch moves some auxiliary functions from eglmisc.c to eglapi.c so + that they may be used to create the extension, API, and version strings + once during eglInitialize(). The auxiliary functions are renamed from + _eglUpdate* to _eglCreate*, and some checks made unnecessary by calling + the functions from eglInitialize() are removed. + + Reviewed-by: Chad Versace + +commit b43bbfa90ace596c8b2e0b3954a5f69924726c59 +Author: Samuel Iglesias Gonsalvez +Date: Tue Feb 24 19:02:57 2015 +0100 + + glsl: optimize (0 cmp x + y) into (-x cmp y). + + The optimization done by commit 34ec1a24d did not take it into account. + + Fixes: + + dEQP-GLES3.functional.shaders.random.all_features.fragment.20 + + Signed-off-by: Samuel Iglesias Gonsalvez + Reviewed-by: Ian Romanick + Reviewed-by: Matt Turner + Cc: "10.4 10.5" + +commit cf6f33ee68ca56df1650762634fa9c038359c3ec +Author: Eduardo Lima Mitev +Date: Thu Mar 12 08:16:09 2015 +0100 + + mesa: Check for valid PBO access in gl(Compressed)Tex(Sub)Image calls + + This patch adds two types of checks to the gl(Compressed)Tex(Sub)Imgage family + of functions when a pixel buffer object is bound to GL_PIXEL_UNPACK_BUFFER: + + - That the buffer is not mapped. + - The total data size is within the boundaries of the buffer size. + + It does so by calling auxiliary validations functions from PBO API: + _mesa_validate_pbo_source() for non-compressed texture calls, and + _mesa_validate_pbo_source_compressed() for compressed texture calls. + + The first check is defined in Section 6.3.2 'Effects of Mapping Buffers + on Other GL Commands' of the GLES 3.1 spec, page 57: + + "Any GL command which attempts to read from, write to, or change the + state of a buffer object may generate an INVALID_OPERATION error if all + or part of the buffer object is mapped. However, only commands which + explicitly describe this error are required to do so. If an error is not + generated, using such commands to perform invalid reads, writes, or + state changes will have undefined results and may result in GL + interruption or termination." + + Similar wording exists in GL 4.5 spec, page 76. + + In the case of gl(Compressed)Tex(Sub)Image(2,3)D, the specification doesn't force + implemtations to throw an error. However since Mesa don't currently implement + checks to determine when it is safe to read/write from/to a mapped PBO, we + should always return the error if all or parts of it are mapped. + + The 2nd check is defined in Section 8.5 'Texture Image Specification' of the + OpenGL 4.5 spec, page 203: + + "An INVALID_OPERATION error is generated if a pixel unpack buffer object + is bound and storing texture data would access memory beyond the end of + the pixel unpack buffer." + + Fixes 4 dEQP tests: + * dEQP-GLES3.functional.negative_api.texture.compressedteximage2d_invalid_buffer_target + * dEQP-GLES3.functional.negative_api.texture.compressedtexsubimage2d_invalid_buffer_target + * dEQP-GLES3.functional.negative_api.texture.compressedteximage3d_invalid_buffer_target + * dEQP-GLES3.functional.negative_api.texture.compressedtexsubimage3d_invalid_buffer_target + + Reviewed-by: Laura Ekstrand + +commit 7c084752c612c1763212830618ee0a86f4edf8f6 +Author: Eduardo Lima Mitev +Date: Thu Mar 12 08:14:03 2015 +0100 + + mesa: Separate PBO validation checks from buffer mapping, to allow reuse + + Internal PBO functions such as _mesa_map_validate_pbo_source() and + _mesa_validate_pbo_compressed_teximage() perform validation and buffer mapping + within the same call. + + This patch takes out the validation into separate functions to allow reuse + of functionality by other code (i.e, gl(Compressed)Tex(Sub)Image). + + Reviewed-by: Laura Ekstrand + +commit 7b5bb97cefbf1d0cfef28bc974ee9a68024e3b45 +Author: Eduardo Lima Mitev +Date: Thu Mar 5 09:20:11 2015 +0100 + + mesa: Set the correct image size in _mesa_validate_pbo_access() + + _mesa_validate_pbo_access() provides a generic way to check that a + requested pixel transfer operation on a PBO falls within the + boundaries of the buffer. It is used in various other places, and + depending on the caller, some arguments are used or not. + + In particular, the 'clientMemSize' argument is used only by calls + that are knowledgeable of the total size of the user data involved + in a pixel transfer, such as the case of compressed texture image + calls. Other calls don't provide 'clientMemSize' directly since it + is made implicit from the size and format of the texture, and its + data type. In these cases, a sufficiently big value is passed to + 'clientMemSize' (INT_MAX) to avoid an incorrect constrain. + + The problem is that _mesa_validate_pbo_access() use uint + pointers to make the calculations, which are 64 bits long in 64 + bits platforms, meanwhile the dummy INT_MAX passed in 'clientMemSize' + is just 32 bits. This causes a constrain that is not desired. + + This patch fixes that by checking that if 'clientMemSize' is MAX_INT, + then UINTPTR_MAX is assumed instead. + + This is an ugly workaround to the fact that _mesa_validate_pbo_access() + intends to be a one function fits all. The clean solution here would + be to break it into different functions that provide the adequate API + for each of the possible code paths and validation needs. + + Since there are callers relying on passing INT_MAX to 'clientMemSize', + this patch is necessary to deal with the problem above while a cleaner + implementation of the PBO API is not implemented. + + Reviewed-by: Laura Ekstrand + +commit f6f7bfb5e1308593df9642aa8f46a17e8ce340a2 +Author: Eduardo Lima Mitev +Date: Tue Mar 10 19:33:30 2015 +0100 + + meta: Remove error checks for texture <-> pixel-buffer transfers that don't belong in driver code + + The implementation of texture <-> pixel-buffer transfers in drivers common layer + includes certain error checks and argument validation that don't belong there, + considering how the Mesa codebase is laid out. These are higher level + validations that, if necessary, should be performed earlier (i.e, in GL API + entry points). + + This patch simply removes these error checks from driver code. + + For more information, see discussion at + http://lists.freedesktop.org/archives/mesa-dev/2015-February/077417.html. + + Reviewed-by: Laura Ekstrand + +commit 558dcd877095a27ce5de8198744f2f95ddf66b2a +Author: Brian Paul +Date: Thu Mar 12 15:50:20 2015 -0600 + + util: convert slab macros to inline functions + + Reviewed-by: Jose Fonseca + +commit d24a20e967303e1172928937fb7f96f010d4a99c +Author: Brian Paul +Date: Thu Mar 12 08:35:38 2015 -0600 + + egl: fix cast to silence compiler warning + + eglcurrent.c: In function '_eglSetTSD': + eglcurrent.c:57:4: warning: passing argument 2 of 'tss_set' discards + 'const' qualifier from pointer target type [enabled by default] + tss_set(_egl_TSD, (const void *) t); + ^ + In file included from ../../../include/c11/threads.h:72:0, + from eglcurrent.c:32: + ../../../include/c11/threads_posix.h:357:1: note: expected 'void *' + but argument is of type 'const void *' + tss_set(tss_t key, void *val) + ^ + + Reviewed-by: Emil Velikov + +commit a38e6c4fbd6a941ec9ffb98bdf9f040cccda0247 +Author: Alexandre Demers +Date: Thu Mar 12 20:50:08 2015 -0400 + + gallivm: (trivial) Fix typo in comment introduced by 70dc8a + + Fix typo in comment introduced by 70dc8a + + Signed-off-by: Alexandre Demers + Signed-off-by: Jose Fonseca + +commit 1a469a34d517d4c24c60a613c7d1a56f77778c8e +Author: Seán de Búrca +Date: Sat Mar 7 02:23:53 2015 -0700 + + mesa: improve ARB_copy_image internal format compat check + + The memory layout of compatible internal formats may differ in bytes per + block, so TexFormat is not a reliable measure of compatibility. For example, + GL_RGB8 and GL_RGB8UI are compatible formats, but GL_RGB8 may be laid out in + memory as B8G8R8X8. If GL_RGB8UI has a 3 byte-per-block memory layout, the + existing compatibility check will fail. + + Additionally, the current check allows any two compressed textures which share + block size to be used, whereas the spec gives an explicit table of compatible + formats. + + v2: Use a switch instead of array iteration for block class and show the + correct GL error when internal formats are mismatched. + v3: Include spec citations for new compatibility checks, rearrange check + order to ensure that compressed, view-compatible formats return the + correct result, and make style fixes. Original commit message amended + for clarity. + v4: Reformatted spec citations. + + Reviewed-by: Jason Ekstrand + +commit f3e4b2c9d2087c7f655d323cc6b4150313fc0128 +Author: Kenneth Graunke +Date: Mon Mar 9 18:36:31 2015 -0700 + + nir: Fix non-determinism in nir_lower_vars_to_ssa(). + + Previously, we stored derefs in a hash table, using the malloc'd pointer + as the key. Then, we walked through the hash table and generated code, + based on the order of the hash table's elements. + + Memory addresses returned by malloc are pretty much random, which meant + that the hash was random, and the hash table's elements would be walked + in some random order. This led to successive compiles of the same + shader using different variable names and slightly different orderings + of phi-nodes. Code could not be diff'd, and the final assembly would + sometimes change slightly too. + + It turns out the only point of the hash table was to avoid inserting + the same node multiple times for different dereferences. We never + actually searched the hash table! This patch uses an intrusive + linked list instead. Since exec_list uses head and tail sentinels, + checking prev or next against NULL will tell us whether the node is + already in the list. + + Pair programming with Jason Ekstrand. + + Signed-off-by: Jason Ekstrand + Signed-off-by: Kenneth Graunke + Reviewed-by: Connor Abbott + +commit 67388c1ef27e9ff4d7f60a496dbaea4b290dc741 +Author: Jason Ekstrand +Date: Mon Mar 9 18:36:30 2015 -0700 + + util: Fix foreach_list_typed_safe when exec_node is not at offset 0. + + __next and __prev are pointers to the structure containing the exec_node + link, not the embedded exec_node. NULL checks would fail unless the + embedded exec_node happened to be at offset 0 in the parent struct. + + v2: Jason Ekstrand : + Use "(__node)->__field.next != NULL" to check for the end of the list + instead of the "&__next->__field != NULL". The former is far more + obviously correct as it matches what the non-safe versions do. The + original code tried to avoid any use of __next as the client code may + delete it during its execution. However, since the looping condition is + checked after the iteration clause but before the client code is + executed, we know that __node is valid during the looping condition. + + Signed-off-by: Jason Ekstrand + Reviewed-by: Matt Turner + Reviewed-by: Connor Abbott + Reviewed-by: Kenneth Graunke + +commit 547c760964bcad23a056e5156e4fefd7487c0192 +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:59 2015 -0700 + + i965: Use NIR for scalar VS when INTEL_USE_NIR is set. + + Signed-off-by: Kenneth Graunke + Reviewed-by: Jason Ekstrand + +commit 7ef0b6b367f73e24e6dd47a15d439775d3dd1297 +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:58 2015 -0700 + + i965/fs: Add VS output support to nir_setup_outputs(). + + Adapted from fs_visitor::visit(ir_variable *). + + Signed-off-by: Kenneth Graunke + Reviewed-by: Jason Ekstrand + +commit eb137117b7db6c78d6a1662730524d622301c708 +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:57 2015 -0700 + + i965/fs: Handle VS inputs in the NIR backend. + + (Jason noted that this is not a good long term solution, and we should + instead improve nir_lower_io so that this extra set of MOVs is + unnecessary. I tend to agree, but decided we could do that as a + follow-up improvement.) + + Signed-off-by: Kenneth Graunke + Reviewed-by: Jason Ekstrand + +commit a5c4e7fcf52c048c02e4ee14413a574b4ff3695e +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:56 2015 -0700 + + i965/fs: Refactor fs_visitor::nir_setup_inputs(). + + No functional change. In preparation for supporting vertex shaders, + this adds a switch statement on shader stage (since vertex attributes + and fragment shader varyings will need different handling). It also + renames "varying" to "input", to be more general. + + Signed-off-by: Kenneth Graunke + Reviewed-by: Topi Pohjolainen + Reviewed-by: Jason Ekstrand + +commit 34628a838aa96643be02cd23eb55af50025dd422 +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:55 2015 -0700 + + i965: Implement NIR intrinsics for loading VS system values. + + Signed-off-by: Kenneth Graunke + Reviewed-by: Jason Ekstrand + +commit 2c79f6f9c339448b5361f080e8f373cea5de3179 +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:54 2015 -0700 + + nir: Add intrinsics for SYSTEM_VALUE_BASE_VERTEX and VERTEX_ID_ZERO_BASE + + Ian and I added these around the time Connor was developing NIR. Now + that both exist, we should make them work together! + + Signed-off-by: Kenneth Graunke + Reviewed-by: Ian Romanick + Reviewed-by: Jason Ekstrand + +commit b9dea9bc45299f19c445170a4cac27810547de00 +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:53 2015 -0700 + + i965/nir: Lower to registers a bit later. + + We can't safely call nir_optimize() with register present, since several + passes called in the loop can't handle registers, and will fail asserts. + + Notably, nir_lower_vec_alus() and nir_opt_algebraic() really don't want + registers. + + Signed-off-by: Kenneth Graunke + Reviewed-by: Jason Ekstrand + +commit 1f0067811c059fb3b284a2169e94fbdec7a4b909 +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:52 2015 -0700 + + i965/nir: Optimize after nir_lower_var_copies(). + + Array variable copy splitting generates a bunch of stuff we want to + clean up before proceeding. + + Signed-off-by: Kenneth Graunke + Reviewed-by: Jason Ekstrand + +commit 1d8ef6ba606a88239de633e5abcc19471c9d3cf4 +Author: Kenneth Graunke +Date: Mon Mar 9 01:58:51 2015 -0700 + + i965/fs: Store a pointer to brw_sampler_prog_key_data in the visitor. + + The NIR backend hardcodes brw_wm_prog_key at the moment, which won't + work when we support scalar VS. We could use get_tex(), but it's a + static method. I was going to promote it to fs_visitor, but then + realized that both parameters (stage and key) are already members. + + It then occured to me that we could just set up a pointer in the + constructor, and skip having a function altogether. + + This patch also converts all existing users to use key_tex. + + v2: Make key_tex a "const brw_sampler_prog_key_data *" instead of + non-const; word-wrap some lines. (Review comments from Topi.) + + Signed-off-by: Kenneth Graunke + Reviewed-by: Topi Pohjolainen + Reviewed-by: Jason Ekstrand + +commit 48b0a3c1c9d829a9b1d401afb2796b35df94a5d7 +Author: Brian Paul +Date: Wed Mar 11 17:10:53 2015 -0600 + + tnl: HAVE_LE32_VERTS is never defined, remove associated code + + Reviewed-by: Matt Turner + +commit 6d3b86c3afe4ee1bfb29c322b3d36131139cbab9 +Author: Brian Paul +Date: Wed Mar 11 16:54:15 2015 -0600 + + mesa: move LONGSTRING into generated enums.c + + enums.c is the only place this directive is needed. + + Reviewed-by: Matt Turner + +commit f8ed0bbfef7b8e8098cb3263a196689dbd280758 +Author: Brian Paul +Date: Wed Mar 11 08:38:09 2015 -0600 + + mesa: remove _ASMAPI, ASMAPIP + + Reviewed-by: Ian Romanick + +commit 09ffa04cd9c560b7a8c6d8ac80e3d59c49c5ef70 +Author: Brian Paul +Date: Wed Mar 11 08:33:21 2015 -0600 + + mesa: remove _XFORMAPI + + Reviewed-by: Ian Romanick + +commit 10035361b5c23483f236c59fe13c23153455e5c9 +Author: Brian Paul +Date: Wed Mar 11 08:29:56 2015 -0600 + + swrast: remove _BLENDAPI + + _BLENDAPI boils down to __cdecl on Windows, but __cdecl is the default + calling convention so this serves no purpose. + + Reviewed-by: Ian Romanick + +commit 6ca5eaf49ce184009571f58fb94865cf788e8907 +Author: Brian Paul +Date: Sat Mar 7 13:15:22 2015 -0700 + + mesa: use ARRAY_SIZE in _mesa_QueryMatrixxOES() + + Reviewed-by: Matt Turner + +commit c3984c1155bc78b45001f90ed1333bbacfc32151 +Author: Brian Paul +Date: Sat Mar 7 13:15:22 2015 -0700 + + mesa: remove register keyword, add const in _mesa_QueryMatrixxOES() + + Reviewed-by: Matt Turner + +commit 97f6d50f7247c40eeef33219e5cf5ccb7bf6d4ea +Author: Brian Paul +Date: Sat Mar 7 13:15:22 2015 -0700 + + mesa: reindent querymatrix.c + + Use 3-space indents, not 4. Move some comments after the case statements. + + Acked-by: Matt Turner + +commit be4e198be00c03e88315058eb81187a9547e3e87 +Author: Brian Paul +Date: Sat Mar 7 13:15:22 2015 -0700 + + mesa: move fpclassify work-arounds into c99_math.h + + v2: Use #error in the #else clause, per Jose. + + Reviewed-by: Jose Fonseca + +commit 70dc8a9930f561d7ce6db7e58b5bc9b4d940e37b +Author: Jose Fonseca +Date: Thu Mar 12 09:57:43 2015 +0000 + + gallivm: Prevent double delete on LLVM 3.6 + + std::unique_ptr takes ownership of MM, and a double delete could ensure + in case of an error, as pointed out by Chris Vine in + https://bugs.freedesktop.org/show_bug.cgi?id=89387 + + Reviewed-by: Chris Vine + +commit 30916a5ef008a84e53d9821ccc11a0dee50fe77b +Author: Emil Velikov +Date: Mon Mar 9 11:46:07 2015 +0000 + + autogen.sh: pass --force to autoreconf, quote ORIGDIR + + By passing --force autoreconf will update all the aux files, which would + otherwise be ignored if one updates autoconf/automake. + + Quote the ORIGDIR variable to prevent fall-outs, when its name contains + space. + + Signed-off-by: Emil Velikov + Reviewed-by: Matt Turner + +commit a385d18598b28bf935e4460b86ce3f9e095a8015 +Author: Emil Velikov +Date: Fri Mar 6 16:54:59 2015 +0000 + + glx: remove support for non-multithreaded platforms + + Implicitly required for a while, although commit 9385c592c68 (mapi: + remove u_thread.h) was the one that put the final nail on the + coffin. + + Signed-off-by: Emil Velikov + Reviewed-by: Brian Paul + +commit 42144170d189d2539a4fb2243200e760114af9f7 +Author: Emil Velikov +Date: Fri Mar 6 16:54:58 2015 +0000 + + glx: remove final reference to THREADS + + Left over from commit 18db13f5865(mapi: THREADS was always defined, + remove it) + + Signed-off-by: Emil Velikov + Reviewed-by: Brian Paul + +commit 39f90e6b9bb0c9c8b40abae2afde07587cd49010 +Author: Emil Velikov +Date: Fri Mar 6 16:54:57 2015 +0000 + + configure: require pthreads for POSIX builds + + This has been an implicit rule for building mesa for a long time. Let's + make it official and just bail out at configure time. This way we can + cleaning up some of our glx code. + + Signed-off-by: Emil Velikov + Reviewed-by: Brian Paul + +commit a806df3f23cd5fadffbfe818f8c5af7e17205426 +Author: Emil Velikov +Date: Fri Mar 6 16:54:56 2015 +0000 + + egl/main: convert thread management to use c11 threads + + Convert the code to use the C11 threads implementation, and nuke the + Windows non-pthreads code-path. The c11/threads_win32.h abstraction + should be better than the current code. + + Signed-off-by: Emil Velikov + Reviewed-by: Brian Paul + +commit efe87f1a801c61d087cd2b29a2c150453241c3d4 +Author: Emil Velikov +Date: Fri Mar 6 16:54:55 2015 +0000 + + egl/main: use c11/threads' mutex directly + + Remove the inline wrappers/abstraction layer. + + Signed-off-by: Emil Velikov + Reviewed-by: Brian Paul + +commit 90e50908d7f080d91f41d889cfe0dc67134971eb +Author: Jason Ekstrand +Date: Mon Mar 2 17:59:38 2015 -0800 + + nir/worklist: Don't change the start index when computing the tail index + + Reviewed-by: Mark Janes + +commit 8fb8fe46fa565dceedacd95287c836004b0fade2 +Author: Thomas Helland +Date: Sat Feb 28 20:32:32 2015 +0100 + + nir: Optimize a + neg(a) + + Shader-db i965 instructions: + total instructions in shared programs: 1711180 -> 1711159 (-0.00%) + instructions in affected programs: 825 -> 804 (-2.55%) + helped: 9 + HURT: 0 + GAINED: 3 + LOST: 3 + + Shader-db NIR instructions: + total instructions in shared programs: 606187 -> 606179 (-0.00%) + instructions in affected programs: 298 -> 290 (-2.68%) + helped: 4 + HURT: 0 + GAINED: 0 + LOST: 0 + + Reviewed-by: Matt Turner + Reviewed-by: Jason Ekstrand + Signed-off-by: Thomas Helland + +commit 0525f2e851f5f6f53b5f83c8dcdfa48f9838133b +Author: Thomas Helland +Date: Sat Feb 28 20:32:31 2015 +0100 + + nir: Optimize (a*b)+(a*c) -> a*(b+c) + + Shader-db i965 instructions: + total instructions in shared programs: 1715894 -> 1710802 (-0.30%) + instructions in affected programs: 443080 -> 437988 (-1.15%) + helped: 1502 + HURT: 13 + GAINED: 4 + LOST: 4 + + Shader-db NIR instructions: + total instructions in shared programs: 607710 -> 606187 (-0.25%) + instructions in affected programs: 208285 -> 206762 (-0.73%) + helped: 769 + HURT: 8 + GAINED: 0 + LOST: 0 + + Reviewed-by: Matt Turner + Reviewed-by: Jason Ekstrand + Signed-off-by: Thomas Helland + +commit 09b03254091d054800834ddee604885a1093673f +Author: Marius Predut +Date: Wed Mar 11 03:25:00 2015 -0600 + + vbo: improve the code style by adjust the preprocessing c code directives + + Brian Paul review suggestion: there's more macro use here than necessary. + Removed and redefine some #define preprocessing directives. + Removed the directive input parameter 'T' . + No functional changes. + + Signed-off-by: Marius Predut + Reviewed-by: Brian Paul + +commit 9816acff2ca570e248652fe05ac4ee3ce02bd2ab +Author: Brian Paul +Date: Sun Mar 8 16:46:39 2015 -0600 + + mesa: remove CPU_TO_LE32() for AIX + + This is the only remnant of AIX-specific code in Mesa. Probably long + unused. + + Reviewed-by: Ian Romanick + +commit 3158b3abb34abc9f61e4b5161411e5e83640d42d +Author: Brian Paul +Date: Sun Mar 8 16:44:28 2015 -0600 + + mesa: remove #define __volatile + + Not actually used anwhere in Mesa. + + Reviewed-by: Ian Romanick + +commit d7193ce42cedc4cc7839fc4522edf5724e954c80 +Author: Brian Paul +Date: Sat Mar 7 13:15:22 2015 -0700 + + mesa: use strdup() instead of _mesa_strdup() + + We were already using strdup() in various places in Mesa. Get rid + of the _mesa_strdup() wrapper. All the callers pass a non-NULL + argument so the NULL check isn't needed either. + + Reviewed-by: Jose Fonseca + Reviewed-by: Ian Romanick + +commit 5376bc74ccfac0d1a4df6c5652e075d99e3f4fe4 +Author: Brian Paul +Date: Sat Mar 7 13:15:22 2015 -0700 + + st/glx: use strdup() instead of _mesa_strdup() + + Reviewed-by: Jose Fonseca + Reviewed-by: Ian Romanick + +commit 279c5965aa501e6b8f8432b1213f917298154d6c +Author: Brian Paul +Date: Sat Mar 7 13:15:22 2015 -0700 + + xlib: use strdup() instead of _mesa_strdup() + + Reviewed-by: Jose Fonseca + Reviewed-by: Ian Romanick + +commit 14ba6c9325229270d3f04c13253ca547f9a216ff +Author: Brian Paul +Date: Tue Mar 10 08:18:27 2015 -0600 + + i915: add parens to silence operator precedence warning + + Signed-off-by: Brian Paul + +commit 6ac1bc90c4a7a6f32901a9782e14b090f6fe5270 +Author: Iago Toral Quiroga +Date: Tue Mar 10 11:36:43 2015 +0100 + + i965: Fix out-of-bounds accesses into pull_constant_loc array + + The piglit test glsl-fs-uniform-array-loop-unroll.shader_test was designed + to do an out of bounds access into an uniform array to make sure that we + handle that situation gracefully inside the driver, however, as Ken describes + in bug 79202, Valgrind reports that this is leading to an out-of-bounds access + in fs_visitor::demote_pull_constants(). + + Before accessing the pull_constant_loc array we should make sure that + the uniform we are trying to access is valid. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=79202 + Reviewed-by: Matt Turner + commit 5750595ca97b2f8f18d22af35b431a6c66dd899a Author: Jordan Justen Date: Sat Feb 21 15:05:22 2015 -0800 diff -Nru mesa-10.6.0~git20150310.5750595c/common.py mesa-10.6.0~git20150318.27bf37ba/common.py --- mesa-10.6.0~git20150310.5750595c/common.py 2014-07-15 16:33:01.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/common.py 2015-03-18 08:43:34.000000000 +0000 @@ -26,28 +26,28 @@ target_platform = host_platform _machine_map = { - 'x86': 'x86', - 'i386': 'x86', - 'i486': 'x86', - 'i586': 'x86', - 'i686': 'x86', - 'BePC': 'x86', - 'Intel': 'x86', - 'ppc' : 'ppc', - 'BeBox': 'ppc', - 'BeMac': 'ppc', - 'AMD64': 'x86_64', - 'x86_64': 'x86_64', - 'sparc': 'sparc', - 'sun4u': 'sparc', + 'x86': 'x86', + 'i386': 'x86', + 'i486': 'x86', + 'i586': 'x86', + 'i686': 'x86', + 'BePC': 'x86', + 'Intel': 'x86', + 'ppc': 'ppc', + 'BeBox': 'ppc', + 'BeMac': 'ppc', + 'AMD64': 'x86_64', + 'x86_64': 'x86_64', + 'sparc': 'sparc', + 'sun4u': 'sparc', } # find host_machine value if 'PROCESSOR_ARCHITECTURE' in os.environ: - host_machine = os.environ['PROCESSOR_ARCHITECTURE'] + host_machine = os.environ['PROCESSOR_ARCHITECTURE'] else: - host_machine = _platform.machine() + host_machine = _platform.machine() host_machine = _machine_map.get(host_machine, 'generic') default_machine = host_machine @@ -65,7 +65,8 @@ default_llvm = 'no' try: if target_platform != 'windows' and \ - subprocess.call(['llvm-config', '--version'], stdout=subprocess.PIPE) == 0: + subprocess.call(['llvm-config', '--version'], + stdout=subprocess.PIPE) == 0: default_llvm = 'yes' except: pass @@ -75,30 +76,38 @@ # Common options def AddOptions(opts): - try: - from SCons.Variables.BoolVariable import BoolVariable as BoolOption - except ImportError: - from SCons.Options.BoolOption import BoolOption - try: - from SCons.Variables.EnumVariable import EnumVariable as EnumOption - except ImportError: - from SCons.Options.EnumOption import EnumOption - opts.Add(EnumOption('build', 'build type', 'debug', - allowed_values=('debug', 'checked', 'profile', 'release'))) - opts.Add(BoolOption('verbose', 'verbose output', 'no')) - opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, - allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) - opts.Add(EnumOption('platform', 'target platform', host_platform, - allowed_values=('cygwin', 'darwin', 'freebsd', 'haiku', 'linux', 'sunos', 'windows'))) - opts.Add(BoolOption('embedded', 'embedded build', 'no')) - opts.Add(BoolOption('analyze', 'enable static code analysis where available', 'no')) - opts.Add('toolchain', 'compiler toolchain', default_toolchain) - opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no')) - opts.Add(BoolOption('llvm', 'use LLVM', default_llvm)) - opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp (swrast)', 'no')) - opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes')) - opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no')) - opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes')) - opts.Add(BoolOption('texture_float', 'enable floating-point textures and renderbuffers', 'no')) - if host_platform == 'windows': - opts.Add('MSVC_VERSION', 'Microsoft Visual C/C++ version') + try: + from SCons.Variables.BoolVariable import BoolVariable as BoolOption + except ImportError: + from SCons.Options.BoolOption import BoolOption + try: + from SCons.Variables.EnumVariable import EnumVariable as EnumOption + except ImportError: + from SCons.Options.EnumOption import EnumOption + opts.Add(EnumOption('build', 'build type', 'debug', + allowed_values=('debug', 'checked', 'profile', + 'release'))) + opts.Add(BoolOption('verbose', 'verbose output', 'no')) + opts.Add(EnumOption('machine', 'use machine-specific assembly code', + default_machine, + allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) + opts.Add(EnumOption('platform', 'target platform', host_platform, + allowed_values=('cygwin', 'darwin', 'freebsd', 'haiku', + 'linux', 'sunos', 'windows'))) + opts.Add(BoolOption('embedded', 'embedded build', 'no')) + opts.Add(BoolOption('analyze', + 'enable static code analysis where available', 'no')) + opts.Add('toolchain', 'compiler toolchain', default_toolchain) + opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', + 'no')) + opts.Add(BoolOption('llvm', 'use LLVM', default_llvm)) + opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp (swrast)', + 'no')) + opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes')) + opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no')) + opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes')) + opts.Add(BoolOption('texture_float', + 'enable floating-point textures and renderbuffers', + 'no')) + if host_platform == 'windows': + opts.Add('MSVC_VERSION', 'Microsoft Visual C/C++ version') diff -Nru mesa-10.6.0~git20150310.5750595c/configure.ac mesa-10.6.0~git20150318.27bf37ba/configure.ac --- mesa-10.6.0~git20150310.5750595c/configure.ac 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/configure.ac 2015-03-18 08:43:34.000000000 +0000 @@ -658,6 +658,9 @@ ;; *) AX_PTHREAD + if test "x$ax_pthread_ok" = xno; then + AC_MSG_ERROR([Building mesa on this platform requires pthreads]) + fi ;; esac dnl AX_PTHREADS leaves PTHREAD_LIBS empty for gcc and sets PTHREAD_CFLAGS diff -Nru mesa-10.6.0~git20150310.5750595c/debian/changelog mesa-10.6.0~git20150318.27bf37ba/debian/changelog --- mesa-10.6.0~git20150310.5750595c/debian/changelog 2015-03-18 09:12:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/debian/changelog 2015-03-18 09:12:22.000000000 +0000 @@ -1,7 +1,7 @@ -mesa (10.6.0~git20150310.5750595c-0ubuntu0ricotz3~utopic) utopic; urgency=medium +mesa (10.6.0~git20150318.27bf37ba-0ubuntu0ricotz~utopic) utopic; urgency=medium - * Checkout from git 20150310 (master branch) up to commit - 5750595ca97b2f8f18d22af35b431a6c66dd899a + * Checkout from git 20150318 (master branch) up to commit + 27bf37ba05b69ebf6f373d1637a26b4839265921 * Only added debian/ tree from origin/ubuntu * hook: Disable MIR support. * hook: Relax symbols check. @@ -12,7 +12,7 @@ * hook: Drop skl-use-vec4-datatypes-for-message-header.diff (upstream) * hook: update symbols. - -- Rico Tzschichholz Tue, 10 Mar 2015 17:53:27 +0100 + -- Rico Tzschichholz Wed, 18 Mar 2015 09:44:53 +0100 mesa (10.5.0-0ubuntu1) vivid; urgency=medium diff -Nru mesa-10.6.0~git20150310.5750595c/docs/dispatch.html mesa-10.6.0~git20150318.27bf37ba/docs/dispatch.html --- mesa-10.6.0~git20150310.5750595c/docs/dispatch.html 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/docs/dispatch.html 2015-03-18 08:43:34.000000000 +0000 @@ -204,9 +204,8 @@ few preprocessor defines.

    -
  • If GLX_USE_TLS is defined, method #4 is used.
  • -
  • If HAVE_PTHREAD is defined, method #3 is used.
  • -
  • If WIN32_THREADS is defined, method #2 is used.
  • +
  • If GLX_USE_TLS is defined, method #3 is used.
  • +
  • If HAVE_PTHREAD is defined, method #2 is used.
  • If none of the preceding are defined, method #1 is used.
diff -Nru mesa-10.6.0~git20150310.5750595c/docs/GL3.txt mesa-10.6.0~git20150318.27bf37ba/docs/GL3.txt --- mesa-10.6.0~git20150310.5750595c/docs/GL3.txt 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/docs/GL3.txt 2015-03-18 08:43:34.000000000 +0000 @@ -102,11 +102,11 @@ - Dynamically uniform UBO array indices DONE (r600) - Implicit signed -> unsigned conversions DONE - Fused multiply-add DONE () - - Packing/bitfield/conversion functions DONE (r600) + - Packing/bitfield/conversion functions DONE (r600, radeonsi) - Enhanced textureGather DONE (r600, radeonsi) - Geometry shader instancing DONE (r600) - Geometry shader multiple streams DONE () - - Enhanced per-sample shading DONE (r600) + - Enhanced per-sample shading DONE (r600, radeonsi) - Interpolation functions DONE (r600) - New overload resolution rules DONE GL_ARB_gpu_shader_fp64 DONE (nvc0, softpipe) @@ -142,6 +142,7 @@ GL_ARB_shader_image_load_store in progress (curro) GL_ARB_conservative_depth DONE (all drivers that support GLSL 1.30) GL_ARB_shading_language_420pack DONE (all drivers that support GLSL 1.30) + GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_internalformat_query DONE (i965, nv50, nvc0, r300, r600, radeonsi, llvmpipe, softpipe) GL_ARB_map_buffer_alignment DONE (all drivers) @@ -207,6 +208,7 @@ GL_KHR_context_flush_control DONE (all - but needs GLX/EXT extension to be useful) GL_KHR_robust_buffer_access_behavior not started GL_KHR_robustness 90% done (the ARB variant) + GL_EXT_shader_integer_mix DONE (all drivers that support GLSL) These are the extensions cherry-picked to make GLES 3.1 GLES3.1, GLSL ES 3.1 @@ -219,11 +221,12 @@ GL_ARB_shader_atomic_counters DONE (i965) GL_ARB_shader_image_load_store in progress (curro) GL_ARB_shader_storage_buffer_object not started + GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_separate_shader_objects DONE (all drivers) GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_vertex_attrib_binding DONE (all drivers) GS5 Enhanced textureGather DONE (i965, nvc0, r600, radeonsi) - GS5 Packing/bitfield/conversion functions DONE (i965, nvc0, r600) + GS5 Packing/bitfield/conversion functions DONE (i965, nvc0, r600, radeonsi) GL_EXT_shader_integer_mix DONE (all drivers that support GLSL) diff -Nru mesa-10.6.0~git20150310.5750595c/docs/index.html mesa-10.6.0~git20150318.27bf37ba/docs/index.html --- mesa-10.6.0~git20150310.5750595c/docs/index.html 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/docs/index.html 2015-03-18 08:43:34.000000000 +0000 @@ -16,6 +16,12 @@

News

+

March 13, 2015

+

+Mesa 10.5.1 is released. +This is a bug-fix release. +

+

March 06, 2015

Mesa 10.5.0 is released. This is a new diff -Nru mesa-10.6.0~git20150310.5750595c/docs/relnotes/10.5.1.html mesa-10.6.0~git20150318.27bf37ba/docs/relnotes/10.5.1.html --- mesa-10.6.0~git20150310.5750595c/docs/relnotes/10.5.1.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/docs/relnotes/10.5.1.html 2015-03-18 08:43:34.000000000 +0000 @@ -0,0 +1,217 @@ + + + + + Mesa Release Notes + + + + +

+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 10.5.1 Release Notes / March 13, 2015

+ +

+Mesa 10.5.1 is a bug fix release which fixes bugs found since the 10.5.0 release. +

+

+Mesa 10.5.1 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

+ + +

SHA256 checksums

+
+b5b6256a6d46023e16a675257fd11a0f94d7b3e60a76cf112952da3d0fef8e9b  mesa-10.5.1.tar.gz
+ffc51943d15c6812ee7611d053d8980a683fbd6a4986cff567b12cc66637d679  mesa-10.5.1.tar.xz
+
+ + +

New features

+

None

+ +

Bug fixes

+ +

This list is likely incomplete.

+ +
    + +
  • Bug 79202 - valgrind errors in glsl-fs-uniform-array-loop-unroll.shader_test; random code generation
  • + +
  • Bug 84613 - [G965, bisected] piglit regressions : glslparsertest.glsl2
  • + +
  • Bug 86747 - Noise in Football Manager 2014 textures
  • + +
  • Bug 86974 - INTEL_DEBUG=shader_time always asserts in fs_generator::generate_code() when Mesa is built with --enable-debug (= with asserts)
  • + +
  • Bug 88246 - Commit 2881b12 causes 43 DrawElements test regressions
  • + +
  • Bug 88793 - [BDW/BSW Bisected]Piglit/shaders_glsl-max-varyings fails
  • + +
  • Bug 88883 - ir-a2xx.c: variable changed in assert statement
  • + +
  • Bug 88885 - Transform feedback uses incorrect interleaving if a previous draw did not write gl_Position
  • + +
  • Bug 89095 - [SNB/IVB/BYT Bisected]Webglc conformance/glsl/functions/glsl-function-mix-float.html fails
  • + +
  • Bug 89156 - r300g: GL_COMPRESSED_RED_RGTC1 / ATI1N support broken
  • + +
  • Bug 89224 - Incorrect rendering of Unigine Valley running in VM on VMware Workstation
  • + +
  • Bug 89292 - [regression,bisected] incomplete screenshots in some cases
  • + +
  • Bug 89311 - [regression, bisected] dEQP: Added entry points for glCompressedTextureSubImage*D.
  • + +
  • Bug 89312 - [regression, bisected] main: Added entry points for CopyTextureSubImage*D. (d6b7c40cecfe01)
  • + +
  • Bug 89315 - [HSW, regression, bisected] i965/fs: Emit MAD instructions when possible.
  • + +
  • Bug 89317 - [HSW, regression, bisected] i965: Add LINTERP/CINTERP to can_do_cmod() (d91390634)
  • + +
  • Bug 89416 - UE4Editor crash after load project
  • + +
  • Bug 89430 - [g965][bisected] arb_copy_image-targets gl_texture* tests fail
  • + +
+ + +

Changes

+ +

Andrey Sudnik (1):

+
    +
  • i965/vec4: Don't lose the saturate modifier in copy propagation.
  • +
+ +

Chris Forbes (1):

+
    +
  • i965/gs: Check newly-generated GS-out VUE map against correct stage
  • +
+ +

Daniel Stone (1):

+
    +
  • egl: Take alpha bits into account when selecting GBM formats
  • +
+ +

Emil Velikov (5):

+
    +
  • docs: Add sha256 sums for the 10.5.0 release
  • +
  • egl/main: no longer export internal function
  • +
  • cherry-ignore: ignore a few more commits picked without -x
  • +
  • mapi: fix commit 90411b56f6bc817e229d8801ac0adad6d4e3fb7a
  • +
  • Update version to 10.5.1
  • +
+ +

Frank Henigman (1):

+
    +
  • intel: fix EGLImage renderbuffer _BaseFormat
  • +
+ +

Iago Toral Quiroga (1):

+
    +
  • i965: Fix out-of-bounds accesses into pull_constant_loc array
  • +
+ +

Ian Romanick (1):

+
    +
  • i965/fs/nir: Use emit_math for nir_op_fpow
  • +
+ +

Ilia Mirkin (3):

+
    +
  • freedreno: move fb state copy after checking for size change
  • +
  • freedreno/ir3: fix array count returned by TXQ
  • +
  • freedreno/ir3: get the # of miplevels from getinfo
  • +
+ +

Jason Ekstrand (2):

+
    +
  • meta/TexSubImage: Stash everything other than PIXEL_TRANSFER/store in meta_begin
  • +
  • main/base_tex_format: Properly handle STENCIL_INDEX1/4/16
  • +
+ +

Kenneth Graunke (8):

+
    +
  • i965: Split Gen4-5 BlitFramebuffer code; prefer BLT over Meta.
  • +
  • glsl: Mark array access when copying to a temporary for the ?: operator.
  • +
  • i965/fs: Set force_writemask_all on shader_time instructions.
  • +
  • i965/fs: Set smear on shader_time diff register.
  • +
  • i965/fs: Make emit_shader_time_write return rather than emit.
  • +
  • i965/fs: Make get_timestamp() pass back the MOV rather than emitting it.
  • +
  • i965/fs: Make emit_shader_time_end() insert before EOT.
  • +
  • i965/fs: Don't issue FB writes for bound but unwritten color targets.
  • +
+ +

Laura Ekstrand (2):

+
    +
  • main: Fix target checking for CompressedTexSubImage*D.
  • +
  • main: Fix target checking for CopyTexSubImage*D.
  • +
+ +

Marc-Andre Lureau (1):

+
    +
  • gallium/auxiliary/indices: fix start param
  • +
+ +

Marek Olšák (3):

+
    +
  • r300g: fix RGTC1 and LATC1 SNORM formats
  • +
  • r300g: fix a crash when resolving into an sRGB texture
  • +
  • r300g: fix sRGB->sRGB blits
  • +
+ +

Matt Turner (12):

+
    +
  • i965/vec4: Fix implementation of i2b.
  • +
  • mesa: Indent break statements and add a missing one.
  • +
  • mesa: Free memory allocated for luminance in readpixels.
  • +
  • mesa: Correct backwards NULL check.
  • +
  • i965: Consider scratch writes to have side effects.
  • +
  • i965/fs: Don't use backend_visitor::instructions after creating the CFG.
  • +
  • r300g: Use PATH_MAX instead of limiting ourselves to 100 chars.
  • +
  • r300g: Check return value of snprintf().
  • +
  • i965/fs: Don't propagate cmod to inst with different type.
  • +
  • i965: Tell intel_get_memcpy() which direction the memcpy() is going.
  • +
  • Revert SHA1 additions.
  • +
  • i965: Avoid applying negate to wrong MAD source.
  • +
+ +

Neil Roberts (4):

+
    +
  • meta: In pbo_{Get,}TexSubImage don't repeatedly rebind the source tex
  • +
  • Revert "common: Fix PBOs for 1D_ARRAY."
  • +
  • meta: Allow GL_UN/PACK_IMAGE_HEIGHT in _mesa_meta_pbo_Get/TexSubImage
  • +
  • meta: Fix the y offset for 1D_ARRAY in _mesa_meta_pbo_TexSubImage
  • +
+ +

Rob Clark (11):

+
    +
  • freedreno/ir3: fix silly typo for binning pass shaders
  • +
  • freedreno/a2xx: fix increment in assert
  • +
  • freedreno/a4xx: bit of cleanup
  • +
  • freedreno: update generated headers
  • +
  • freedreno/a4xx: set PC_PRIM_VTX_CNTL.VAROUT properly
  • +
  • freedreno: update generated headers
  • +
  • freedreno/a4xx: aniso filtering
  • +
  • freedreno/ir3: fix up cat6 instruction encodings
  • +
  • freedreno/ir3: add support for memory (cat6) instructions
  • +
  • freedreno/ir3: handle flat bypass for a4xx
  • +
  • freedreno/ir3: fix failed assert in grouping
  • +
+ +

Stefan Dösinger (1):

+
    +
  • r300g: Fix the ATI1N swizzle (RGTC1 and LATC1)
  • +
+ +
+ + diff -Nru mesa-10.6.0~git20150310.5750595c/docs/relnotes.html mesa-10.6.0~git20150318.27bf37ba/docs/relnotes.html --- mesa-10.6.0~git20150310.5750595c/docs/relnotes.html 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/docs/relnotes.html 2015-03-18 08:43:34.000000000 +0000 @@ -21,6 +21,7 @@

    +
  • 10.5.1 release notes
  • 10.5.0 release notes
  • 10.4.6 release notes
  • 10.4.5 release notes diff -Nru mesa-10.6.0~git20150310.5750595c/include/c11/threads_posix.h mesa-10.6.0~git20150318.27bf37ba/include/c11/threads_posix.h --- mesa-10.6.0~git20150310.5750595c/include/c11/threads_posix.h 2014-07-15 16:33:01.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/include/c11/threads_posix.h 2015-03-18 08:43:34.000000000 +0000 @@ -177,13 +177,8 @@ && type != (mtx_try|mtx_recursive)) return thrd_error; pthread_mutexattr_init(&attr); - if ((type & mtx_recursive) != 0) { -#if defined(__linux__) || defined(__linux) - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE_NP); -#else + if ((type & mtx_recursive) != 0) pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); -#endif - } pthread_mutex_init(mtx, &attr); pthread_mutexattr_destroy(&attr); return thrd_success; diff -Nru mesa-10.6.0~git20150310.5750595c/include/c99_math.h mesa-10.6.0~git20150318.27bf37ba/include/c99_math.h --- mesa-10.6.0~git20150310.5750595c/include/c99_math.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/include/c99_math.h 2015-03-18 08:43:34.000000000 +0000 @@ -161,4 +161,48 @@ #endif +#if defined(fpclassify) +/* ISO C99 says that fpclassify is a macro. Assume that any implementation + * of fpclassify, whether it's in a C99 compiler or not, will be a macro. + */ +#elif defined(__cplusplus) +/* For C++, fpclassify() should be defined in */ +#elif defined(_MSC_VER) +/* Not required on VS2013 and above. Oddly, the fpclassify() function + * doesn't exist in such a form on MSVC. This is an implementation using + * slightly different lower-level Windows functions. + */ +#include + +static inline enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL} +fpclassify(double x) +{ + switch(_fpclass(x)) { + case _FPCLASS_SNAN: /* signaling NaN */ + case _FPCLASS_QNAN: /* quiet NaN */ + return FP_NAN; + case _FPCLASS_NINF: /* negative infinity */ + case _FPCLASS_PINF: /* positive infinity */ + return FP_INFINITE; + case _FPCLASS_NN: /* negative normal */ + case _FPCLASS_PN: /* positive normal */ + return FP_NORMAL; + case _FPCLASS_ND: /* negative denormalized */ + case _FPCLASS_PD: /* positive denormalized */ + return FP_SUBNORMAL; + case _FPCLASS_NZ: /* negative zero */ + case _FPCLASS_PZ: /* positive zero */ + return FP_ZERO; + default: + /* Should never get here; but if we do, this will guarantee + * that the pattern is not treated like a number. + */ + return FP_NAN; + } +} +#else +#error "Need to include or define an fpclassify function" +#endif + + #endif /* #define _C99_MATH_H_ */ diff -Nru mesa-10.6.0~git20150310.5750595c/.lastcommit mesa-10.6.0~git20150318.27bf37ba/.lastcommit --- mesa-10.6.0~git20150310.5750595c/.lastcommit 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/.lastcommit 2015-03-18 08:43:35.000000000 +0000 @@ -1 +1 @@ -commit 5750595ca97b2f8f18d22af35b431a6c66dd899a +commit 27bf37ba05b69ebf6f373d1637a26b4839265921 diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglapi.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglapi.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglapi.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglapi.c 2015-03-18 08:43:34.000000000 +0000 @@ -87,6 +87,8 @@ #include #include #include "c99_compat.h" +#include "c11/threads.h" +#include "eglcompiler.h" #include "eglglobals.h" #include "eglcontext.h" @@ -100,6 +102,7 @@ #include "eglmode.h" #include "eglimage.h" #include "eglsync.h" +#include "eglstring.h" /** @@ -275,7 +278,7 @@ { _EGLDisplay *dpy = _eglLookupDisplay(display); if (dpy) - _eglLockMutex(&dpy->Mutex); + mtx_lock(&dpy->Mutex); return dpy; } @@ -286,7 +289,7 @@ static inline void _eglUnlockDisplay(_EGLDisplay *dpy) { - _eglUnlockMutex(&dpy->Mutex); + mtx_unlock(&dpy->Mutex); } @@ -341,6 +344,111 @@ } /** + * Copy the extension into the string and update the string pointer. + */ +static EGLint +_eglAppendExtension(char **str, const char *ext) +{ + char *s = *str; + size_t len = strlen(ext); + + if (s) { + memcpy(s, ext, len); + s[len++] = ' '; + s[len] = '\0'; + + *str += len; + } + else { + len++; + } + + return (EGLint) len; +} + +/** + * Examine the individual extension enable/disable flags and recompute + * the driver's Extensions string. + */ +static void +_eglCreateExtensionsString(_EGLDisplay *dpy) +{ +#define _EGL_CHECK_EXTENSION(ext) \ + do { \ + if (dpy->Extensions.ext) { \ + _eglAppendExtension(&exts, "EGL_" #ext); \ + assert(exts <= dpy->ExtensionsString + _EGL_MAX_EXTENSIONS_LEN); \ + } \ + } while (0) + + char *exts = dpy->ExtensionsString; + + _EGL_CHECK_EXTENSION(MESA_screen_surface); + _EGL_CHECK_EXTENSION(MESA_copy_context); + _EGL_CHECK_EXTENSION(MESA_drm_display); + _EGL_CHECK_EXTENSION(MESA_drm_image); + _EGL_CHECK_EXTENSION(MESA_configless_context); + + _EGL_CHECK_EXTENSION(WL_bind_wayland_display); + _EGL_CHECK_EXTENSION(WL_create_wayland_buffer_from_image); + + _EGL_CHECK_EXTENSION(KHR_image_base); + _EGL_CHECK_EXTENSION(KHR_image_pixmap); + if (dpy->Extensions.KHR_image_base && dpy->Extensions.KHR_image_pixmap) + _eglAppendExtension(&exts, "EGL_KHR_image"); + + _EGL_CHECK_EXTENSION(KHR_vg_parent_image); + _EGL_CHECK_EXTENSION(KHR_get_all_proc_addresses); + _EGL_CHECK_EXTENSION(KHR_gl_texture_2D_image); + _EGL_CHECK_EXTENSION(KHR_gl_texture_cubemap_image); + _EGL_CHECK_EXTENSION(KHR_gl_texture_3D_image); + _EGL_CHECK_EXTENSION(KHR_gl_renderbuffer_image); + + _EGL_CHECK_EXTENSION(KHR_reusable_sync); + _EGL_CHECK_EXTENSION(KHR_fence_sync); + + _EGL_CHECK_EXTENSION(KHR_surfaceless_context); + _EGL_CHECK_EXTENSION(KHR_create_context); + + _EGL_CHECK_EXTENSION(NOK_swap_region); + _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap); + + _EGL_CHECK_EXTENSION(ANDROID_image_native_buffer); + + _EGL_CHECK_EXTENSION(CHROMIUM_sync_control); + + _EGL_CHECK_EXTENSION(EXT_create_context_robustness); + _EGL_CHECK_EXTENSION(EXT_buffer_age); + _EGL_CHECK_EXTENSION(EXT_swap_buffers_with_damage); + _EGL_CHECK_EXTENSION(EXT_image_dma_buf_import); + + _EGL_CHECK_EXTENSION(NV_post_sub_buffer); +#undef _EGL_CHECK_EXTENSION +} + +static void +_eglCreateAPIsString(_EGLDisplay *dpy) +{ + if (dpy->ClientAPIs & EGL_OPENGL_BIT) + strcat(dpy->ClientAPIsString, "OpenGL "); + + if (dpy->ClientAPIs & EGL_OPENGL_ES_BIT) + strcat(dpy->ClientAPIsString, "OpenGL_ES "); + + if (dpy->ClientAPIs & EGL_OPENGL_ES2_BIT) + strcat(dpy->ClientAPIsString, "OpenGL_ES2 "); + + if (dpy->ClientAPIs & EGL_OPENGL_ES3_BIT_KHR) + strcat(dpy->ClientAPIsString, "OpenGL_ES3 "); + + if (dpy->ClientAPIs & EGL_OPENVG_BIT) + strcat(dpy->ClientAPIsString, "OpenVG "); + + assert(strlen(dpy->ClientAPIsString) < sizeof(dpy->ClientAPIsString)); +} + + +/** * This is typically the second EGL function that an application calls. * Here we load/initialize the actual hardware driver. */ @@ -375,6 +483,12 @@ * EGL_KHR_get_all_proc_addresses also. */ disp->Extensions.KHR_get_all_proc_addresses = EGL_TRUE; + + _eglCreateExtensionsString(disp); + _eglCreateAPIsString(disp); + _eglsnprintf(disp->VersionString, sizeof(disp->VersionString), + "%d.%d (%s)", disp->VersionMajor, disp->VersionMinor, + disp->Driver->Name); } /* Update applications version of major and minor if not NULL */ @@ -412,7 +526,6 @@ { _EGLDisplay *disp; _EGLDriver *drv; - const char *ret; if (dpy == EGL_NO_DISPLAY && name == EGL_EXTENSIONS) { RETURN_EGL_SUCCESS(NULL, _eglGlobal.ClientExtensionString); @@ -420,9 +533,19 @@ disp = _eglLockDisplay(dpy); _EGL_CHECK_DISPLAY(disp, NULL, drv); - ret = drv->API.QueryString(drv, disp, name); - RETURN_EGL_EVAL(disp, ret); + switch (name) { + case EGL_VENDOR: + RETURN_EGL_SUCCESS(disp, _EGL_VENDOR_STRING); + case EGL_VERSION: + RETURN_EGL_SUCCESS(disp, disp->VersionString); + case EGL_EXTENSIONS: + RETURN_EGL_SUCCESS(disp, disp->ExtensionsString); + case EGL_CLIENT_APIS: + RETURN_EGL_SUCCESS(disp, disp->ClientAPIsString); + default: + RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, NULL); + } } @@ -896,7 +1019,7 @@ RETURN_EGL_SUCCESS(NULL, EGL_TRUE); disp = ctx->Resource.Display; - _eglLockMutex(&disp->Mutex); + mtx_lock(&disp->Mutex); /* let bad current context imply bad current surface */ if (_eglGetContextHandle(ctx) == EGL_NO_CONTEXT || @@ -942,7 +1065,7 @@ RETURN_EGL_SUCCESS(NULL, EGL_TRUE); disp = ctx->Resource.Display; - _eglLockMutex(&disp->Mutex); + mtx_lock(&disp->Mutex); /* let bad current context imply bad current surface */ if (_eglGetContextHandle(ctx) == EGL_NO_CONTEXT || @@ -1457,10 +1580,10 @@ t->CurrentAPIIndex = i; - _eglLockMutex(&disp->Mutex); + mtx_lock(&disp->Mutex); drv = disp->Driver; (void) drv->API.MakeCurrent(drv, disp, NULL, NULL, NULL); - _eglUnlockMutex(&disp->Mutex); + mtx_unlock(&disp->Mutex); } } diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglapi.h mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglapi.h --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglapi.h 2014-08-12 16:09:46.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglapi.h 2015-03-18 08:43:34.000000000 +0000 @@ -71,7 +71,6 @@ typedef EGLBoolean (*CopyBuffers_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, void *native_pixmap_target); /* misc funcs */ -typedef const char *(*QueryString_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLint name); typedef EGLBoolean (*WaitClient_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx); typedef EGLBoolean (*WaitNative_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine); @@ -170,7 +169,6 @@ SwapBuffers_t SwapBuffers; CopyBuffers_t CopyBuffers; - QueryString_t QueryString; WaitClient_t WaitClient; WaitNative_t WaitNative; GetProcAddress_t GetProcAddress; diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglcurrent.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglcurrent.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglcurrent.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglcurrent.c 2015-03-18 08:43:34.000000000 +0000 @@ -29,9 +29,9 @@ #include #include #include "c99_compat.h" +#include "c11/threads.h" #include "egllog.h" -#include "eglmutex.h" #include "eglcurrent.h" #include "eglglobals.h" @@ -42,14 +42,9 @@ /* a fallback thread info to guarantee that every thread always has one */ static _EGLThreadInfo dummy_thread = _EGL_THREAD_INFO_INITIALIZER; - - -#if HAVE_PTHREAD -#include - -static _EGLMutex _egl_TSDMutex = _EGL_MUTEX_INITIALIZER; +static mtx_t _egl_TSDMutex = _MTX_INITIALIZER_NP; static EGLBoolean _egl_TSDInitialized; -static pthread_key_t _egl_TSD; +static tss_t _egl_TSD; static void (*_egl_FreeTSD)(_EGLThreadInfo *); #ifdef GLX_USE_TLS @@ -59,7 +54,7 @@ static inline void _eglSetTSD(const _EGLThreadInfo *t) { - pthread_setspecific(_egl_TSD, (const void *) t); + tss_set(_egl_TSD, (void *) t); #ifdef GLX_USE_TLS _egl_TLS = t; #endif @@ -70,33 +65,33 @@ #ifdef GLX_USE_TLS return (_EGLThreadInfo *) _egl_TLS; #else - return (_EGLThreadInfo *) pthread_getspecific(_egl_TSD); + return (_EGLThreadInfo *) tss_get(_egl_TSD); #endif } static inline void _eglFiniTSD(void) { - _eglLockMutex(&_egl_TSDMutex); + mtx_lock(&_egl_TSDMutex); if (_egl_TSDInitialized) { _EGLThreadInfo *t = _eglGetTSD(); _egl_TSDInitialized = EGL_FALSE; if (t && _egl_FreeTSD) _egl_FreeTSD((void *) t); - pthread_key_delete(_egl_TSD); + tss_delete(_egl_TSD); } - _eglUnlockMutex(&_egl_TSDMutex); + mtx_unlock(&_egl_TSDMutex); } static inline EGLBoolean _eglInitTSD(void (*dtor)(_EGLThreadInfo *)) { if (!_egl_TSDInitialized) { - _eglLockMutex(&_egl_TSDMutex); + mtx_lock(&_egl_TSDMutex); /* check again after acquiring lock */ if (!_egl_TSDInitialized) { - if (pthread_key_create(&_egl_TSD, (void (*)(void *)) dtor) != 0) { - _eglUnlockMutex(&_egl_TSDMutex); + if (tss_create(&_egl_TSD, (void (*)(void *)) dtor) != thrd_success) { + mtx_unlock(&_egl_TSDMutex); return EGL_FALSE; } _egl_FreeTSD = dtor; @@ -104,44 +99,12 @@ _egl_TSDInitialized = EGL_TRUE; } - _eglUnlockMutex(&_egl_TSDMutex); + mtx_unlock(&_egl_TSDMutex); } return EGL_TRUE; } -#else /* HAVE_PTHREAD */ -static const _EGLThreadInfo *_egl_TSD; -static void (*_egl_FreeTSD)(_EGLThreadInfo *); - -static inline void _eglSetTSD(const _EGLThreadInfo *t) -{ - _egl_TSD = t; -} - -static inline _EGLThreadInfo *_eglGetTSD(void) -{ - return (_EGLThreadInfo *) _egl_TSD; -} - -static inline void _eglFiniTSD(void) -{ - if (_egl_FreeTSD && _egl_TSD) - _egl_FreeTSD((_EGLThreadInfo *) _egl_TSD); -} - -static inline EGLBoolean _eglInitTSD(void (*dtor)(_EGLThreadInfo *)) -{ - if (!_egl_FreeTSD && dtor) { - _egl_FreeTSD = dtor; - _eglAddAtExitCall(_eglFiniTSD); - } - return EGL_TRUE; -} - -#endif /* !HAVE_PTHREAD */ - - static void _eglInitThreadInfo(_EGLThreadInfo *t) { diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/egldisplay.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/egldisplay.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/egldisplay.c 2015-02-25 15:10:41.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/egldisplay.c 2015-03-18 08:43:34.000000000 +0000 @@ -35,13 +35,14 @@ #include #include #include +#include "c11/threads.h" + #include "eglcontext.h" #include "eglcurrent.h" #include "eglsurface.h" #include "egldisplay.h" #include "egldriver.h" #include "eglglobals.h" -#include "eglmutex.h" #include "egllog.h" /* Includes for _eglNativePlatformDetectNativeDisplay */ @@ -260,7 +261,7 @@ if (plat == _EGL_INVALID_PLATFORM) return NULL; - _eglLockMutex(_eglGlobal.Mutex); + mtx_lock(_eglGlobal.Mutex); /* search the display list first */ dpy = _eglGlobal.DisplayList; @@ -274,7 +275,7 @@ if (!dpy) { dpy = calloc(1, sizeof(_EGLDisplay)); if (dpy) { - _eglInitMutex(&dpy->Mutex); + mtx_init(&dpy->Mutex, mtx_plain); dpy->Platform = plat; dpy->PlatformDisplay = plat_dpy; @@ -284,7 +285,7 @@ } } - _eglUnlockMutex(_eglGlobal.Mutex); + mtx_unlock(_eglGlobal.Mutex); return dpy; } @@ -344,14 +345,14 @@ { _EGLDisplay *cur; - _eglLockMutex(_eglGlobal.Mutex); + mtx_lock(_eglGlobal.Mutex); cur = _eglGlobal.DisplayList; while (cur) { if (cur == (_EGLDisplay *) dpy) break; cur = cur->Next; } - _eglUnlockMutex(_eglGlobal.Mutex); + mtx_unlock(_eglGlobal.Mutex); return (cur != NULL); } diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/egldisplay.h mesa-10.6.0~git20150318.27bf37ba/src/egl/main/egldisplay.h --- mesa-10.6.0~git20150310.5750595c/src/egl/main/egldisplay.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/egldisplay.h 2015-03-18 08:43:34.000000000 +0000 @@ -32,10 +32,10 @@ #define EGLDISPLAY_INCLUDED #include "c99_compat.h" +#include "c11/threads.h" #include "egltypedefs.h" #include "egldefines.h" -#include "eglmutex.h" #include "eglarray.h" @@ -132,7 +132,7 @@ /* used to link displays */ _EGLDisplay *Next; - _EGLMutex Mutex; + mtx_t Mutex; _EGLPlatformType Platform; /**< The type of the platform display */ void *PlatformDisplay; /**< A pointer to the platform display */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/egldriver.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/egldriver.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/egldriver.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/egldriver.c 2015-03-18 08:43:34.000000000 +0000 @@ -37,13 +37,13 @@ #include #include #include +#include "c11/threads.h" #include "eglstring.h" #include "egldefines.h" #include "egldisplay.h" #include "egldriver.h" #include "egllog.h" -#include "eglmutex.h" #if defined(_EGL_OS_UNIX) #include @@ -63,7 +63,7 @@ _EGLDriver *Driver; } _EGLModule; -static _EGLMutex _eglModuleMutex = _EGL_MUTEX_INITIALIZER; +static mtx_t _eglModuleMutex = _MTX_INITIALIZER_NP; static _EGLArray *_eglModules; const struct { @@ -616,7 +616,7 @@ assert(!dpy->Initialized); - _eglLockMutex(&_eglModuleMutex); + mtx_lock(&_eglModuleMutex); /* set options */ dpy->Options.TestOnly = test_only; @@ -628,7 +628,7 @@ best_drv = _eglMatchAndInitialize(dpy); } - _eglUnlockMutex(&_eglModuleMutex); + mtx_unlock(&_eglModuleMutex); if (best_drv) { _eglLog(_EGL_DEBUG, "the best driver is %s%s", diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglfallbacks.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglfallbacks.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglfallbacks.c 2012-01-02 08:23:27.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglfallbacks.c 2015-03-18 08:43:34.000000000 +0000 @@ -32,7 +32,6 @@ #include "eglconfig.h" #include "eglcontext.h" #include "eglsurface.h" -#include "eglmisc.h" #include "eglscreen.h" #include "eglmode.h" #include "eglsync.h" @@ -85,7 +84,6 @@ drv->API.WaitClient = (WaitClient_t) _eglReturnFalse; drv->API.WaitNative = (WaitNative_t) _eglReturnFalse; drv->API.GetProcAddress = (GetProcAddress_t) _eglReturnFalse; - drv->API.QueryString = _eglQueryString; #ifdef EGL_MESA_screen_surface drv->API.CopyContextMESA = (CopyContextMESA_t) _eglReturnFalse; diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglglobals.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglglobals.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglglobals.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglglobals.c 2015-03-18 08:43:34.000000000 +0000 @@ -30,13 +30,14 @@ #include #include +#include "c11/threads.h" + #include "eglglobals.h" #include "egldisplay.h" #include "egldriver.h" -#include "eglmutex.h" -static _EGLMutex _eglGlobalMutex = _EGL_MUTEX_INITIALIZER; +static mtx_t _eglGlobalMutex = _MTX_INITIALIZER_NP; struct _egl_global _eglGlobal = { @@ -84,7 +85,7 @@ if (func) { static EGLBoolean registered = EGL_FALSE; - _eglLockMutex(_eglGlobal.Mutex); + mtx_lock(_eglGlobal.Mutex); if (!registered) { atexit(_eglAtExit); @@ -94,6 +95,6 @@ assert(_eglGlobal.NumAtExitCalls < ARRAY_SIZE(_eglGlobal.AtExitCalls)); _eglGlobal.AtExitCalls[_eglGlobal.NumAtExitCalls++] = func; - _eglUnlockMutex(_eglGlobal.Mutex); + mtx_unlock(_eglGlobal.Mutex); } } diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglglobals.h mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglglobals.h --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglglobals.h 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglglobals.h 2015-03-18 08:43:34.000000000 +0000 @@ -32,9 +32,9 @@ #define EGLGLOBALS_INCLUDED #include +#include "c11/threads.h" #include "egltypedefs.h" -#include "eglmutex.h" /** @@ -42,7 +42,7 @@ */ struct _egl_global { - _EGLMutex *Mutex; + mtx_t *Mutex; /* the list of all displays */ _EGLDisplay *DisplayList; diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/egllog.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/egllog.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/egllog.c 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/egllog.c 2015-03-18 08:43:34.000000000 +0000 @@ -38,24 +38,24 @@ #include #include #include +#include "c11/threads.h" #include "egllog.h" #include "eglstring.h" -#include "eglmutex.h" #define MAXSTRING 1000 #define FALLBACK_LOG_LEVEL _EGL_WARNING static struct { - _EGLMutex mutex; + mtx_t mutex; EGLBoolean initialized; EGLint level; _EGLLogProc logger; EGLint num_messages; } logging = { - _EGL_MUTEX_INITIALIZER, + _MTX_INITIALIZER_NP, EGL_FALSE, FALLBACK_LOG_LEVEL, NULL, @@ -82,7 +82,7 @@ { EGLint num_messages = 0; - _eglLockMutex(&logging.mutex); + mtx_lock(&logging.mutex); if (logging.logger != logger) { logging.logger = logger; @@ -91,7 +91,7 @@ logging.num_messages = 0; } - _eglUnlockMutex(&logging.mutex); + mtx_unlock(&logging.mutex); if (num_messages) _eglLog(_EGL_DEBUG, @@ -111,9 +111,9 @@ case _EGL_WARNING: case _EGL_INFO: case _EGL_DEBUG: - _eglLockMutex(&logging.mutex); + mtx_lock(&logging.mutex); logging.level = level; - _eglUnlockMutex(&logging.mutex); + mtx_unlock(&logging.mutex); break; default: break; @@ -188,7 +188,7 @@ if (level > logging.level || level < 0) return; - _eglLockMutex(&logging.mutex); + mtx_lock(&logging.mutex); if (logging.logger) { va_start(args, fmtStr); @@ -201,7 +201,7 @@ logging.num_messages++; } - _eglUnlockMutex(&logging.mutex); + mtx_unlock(&logging.mutex); if (level == _EGL_FATAL) exit(1); /* or abort()? */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglmisc.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglmisc.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglmisc.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglmisc.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,183 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * Copyright 2009-2010 Chia-I Wu - * Copyright 2010-2011 LunarG, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Small/misc EGL functions - */ - - -#include -#include -#include "eglcurrent.h" -#include "eglmisc.h" -#include "egldisplay.h" -#include "egldriver.h" -#include "eglstring.h" - - -/** - * Copy the extension into the string and update the string pointer. - */ -static EGLint -_eglAppendExtension(char **str, const char *ext) -{ - char *s = *str; - size_t len = strlen(ext); - - if (s) { - memcpy(s, ext, len); - s[len++] = ' '; - s[len] = '\0'; - - *str += len; - } - else { - len++; - } - - return (EGLint) len; -} - - -/** - * Examine the individual extension enable/disable flags and recompute - * the driver's Extensions string. - */ -static void -_eglUpdateExtensionsString(_EGLDisplay *dpy) -{ -#define _EGL_CHECK_EXTENSION(ext) \ - do { \ - if (dpy->Extensions.ext) { \ - _eglAppendExtension(&exts, "EGL_" #ext); \ - assert(exts <= dpy->ExtensionsString + _EGL_MAX_EXTENSIONS_LEN); \ - } \ - } while (0) - - char *exts = dpy->ExtensionsString; - - if (exts[0]) - return; - - _EGL_CHECK_EXTENSION(MESA_screen_surface); - _EGL_CHECK_EXTENSION(MESA_copy_context); - _EGL_CHECK_EXTENSION(MESA_drm_display); - _EGL_CHECK_EXTENSION(MESA_drm_image); - _EGL_CHECK_EXTENSION(MESA_configless_context); - - _EGL_CHECK_EXTENSION(WL_bind_wayland_display); - _EGL_CHECK_EXTENSION(WL_create_wayland_buffer_from_image); - - _EGL_CHECK_EXTENSION(KHR_image_base); - _EGL_CHECK_EXTENSION(KHR_image_pixmap); - if (dpy->Extensions.KHR_image_base && dpy->Extensions.KHR_image_pixmap) - _eglAppendExtension(&exts, "EGL_KHR_image"); - - _EGL_CHECK_EXTENSION(KHR_vg_parent_image); - _EGL_CHECK_EXTENSION(KHR_get_all_proc_addresses); - _EGL_CHECK_EXTENSION(KHR_gl_texture_2D_image); - _EGL_CHECK_EXTENSION(KHR_gl_texture_cubemap_image); - _EGL_CHECK_EXTENSION(KHR_gl_texture_3D_image); - _EGL_CHECK_EXTENSION(KHR_gl_renderbuffer_image); - - _EGL_CHECK_EXTENSION(KHR_reusable_sync); - _EGL_CHECK_EXTENSION(KHR_fence_sync); - - _EGL_CHECK_EXTENSION(KHR_surfaceless_context); - _EGL_CHECK_EXTENSION(KHR_create_context); - - _EGL_CHECK_EXTENSION(NOK_swap_region); - _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap); - - _EGL_CHECK_EXTENSION(ANDROID_image_native_buffer); - - _EGL_CHECK_EXTENSION(CHROMIUM_sync_control); - - _EGL_CHECK_EXTENSION(EXT_create_context_robustness); - _EGL_CHECK_EXTENSION(EXT_buffer_age); - _EGL_CHECK_EXTENSION(EXT_swap_buffers_with_damage); - _EGL_CHECK_EXTENSION(EXT_image_dma_buf_import); - - _EGL_CHECK_EXTENSION(NV_post_sub_buffer); -#undef _EGL_CHECK_EXTENSION -} - - -static void -_eglUpdateAPIsString(_EGLDisplay *dpy) -{ - char *apis = dpy->ClientAPIsString; - - if (apis[0] || !dpy->ClientAPIs) - return; - - if (dpy->ClientAPIs & EGL_OPENGL_BIT) - strcat(apis, "OpenGL "); - - if (dpy->ClientAPIs & EGL_OPENGL_ES_BIT) - strcat(apis, "OpenGL_ES "); - - if (dpy->ClientAPIs & EGL_OPENGL_ES2_BIT) - strcat(apis, "OpenGL_ES2 "); - - if (dpy->ClientAPIs & EGL_OPENGL_ES3_BIT_KHR) - strcat(apis, "OpenGL_ES3 "); - - if (dpy->ClientAPIs & EGL_OPENVG_BIT) - strcat(apis, "OpenVG "); - - assert(strlen(apis) < sizeof(dpy->ClientAPIsString)); -} - - -const char * -_eglQueryString(_EGLDriver *drv, _EGLDisplay *dpy, EGLint name) -{ - (void) drv; - - switch (name) { - case EGL_VENDOR: - return _EGL_VENDOR_STRING; - case EGL_VERSION: - _eglsnprintf(dpy->VersionString, sizeof(dpy->VersionString), - "%d.%d (%s)", dpy->VersionMajor, dpy->VersionMinor, - dpy->Driver->Name); - return dpy->VersionString; - case EGL_EXTENSIONS: - _eglUpdateExtensionsString(dpy); - return dpy->ExtensionsString; - case EGL_CLIENT_APIS: - _eglUpdateAPIsString(dpy); - return dpy->ClientAPIsString; - default: - _eglError(EGL_BAD_PARAMETER, "eglQueryString"); - return NULL; - } -} diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglmisc.h mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglmisc.h --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglmisc.h 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglmisc.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * Copyright 2009-2010 Chia-I Wu - * Copyright 2010-2011 LunarG, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef EGLMISC_INCLUDED -#define EGLMISC_INCLUDED - - -#include "egltypedefs.h" - - -extern const char * -_eglQueryString(_EGLDriver *drv, _EGLDisplay *dpy, EGLint name); - - -#endif /* EGLMISC_INCLUDED */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglmutex.h mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglmutex.h --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglmutex.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglmutex.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,66 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Chia-I Wu - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef EGLMUTEX_INCLUDED -#define EGLMUTEX_INCLUDED - -#include "c99_compat.h" - -#include "eglcompiler.h" - -#include "c11/threads.h" - -typedef mtx_t _EGLMutex; - -static inline void _eglInitMutex(_EGLMutex *m) -{ - mtx_init(m, mtx_plain); -} - -static inline void -_eglDestroyMutex(_EGLMutex *m) -{ - mtx_destroy(m); -} - -static inline void -_eglLockMutex(_EGLMutex *m) -{ - mtx_lock(m); -} - -static inline void -_eglUnlockMutex(_EGLMutex *m) -{ - mtx_unlock(m); -} - -#define _EGL_MUTEX_INITIALIZER _MTX_INITIALIZER_NP - - -#endif /* EGLMUTEX_INCLUDED */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/eglscreen.c mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglscreen.c --- mesa-10.6.0~git20150310.5750595c/src/egl/main/eglscreen.c 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/eglscreen.c 2015-03-18 08:43:34.000000000 +0000 @@ -44,20 +44,20 @@ #include #include #include +#include "c11/threads.h" #include "egldisplay.h" #include "eglcurrent.h" #include "eglmode.h" #include "eglsurface.h" #include "eglscreen.h" -#include "eglmutex.h" #ifdef EGL_MESA_screen_surface /* ugh, no atomic op? */ -static _EGLMutex _eglNextScreenHandleMutex = _EGL_MUTEX_INITIALIZER; +static mtx_t _eglNextScreenHandleMutex = _MTX_INITIALIZER_NP; static EGLScreenMESA _eglNextScreenHandle = 1; @@ -70,10 +70,10 @@ { EGLScreenMESA s; - _eglLockMutex(&_eglNextScreenHandleMutex); + mtx_lock(&_eglNextScreenHandleMutex); s = _eglNextScreenHandle; _eglNextScreenHandle += _EGL_SCREEN_MAX_MODES; - _eglUnlockMutex(&_eglNextScreenHandleMutex); + mtx_unlock(&_eglNextScreenHandleMutex); return s; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/egl/main/Makefile.sources mesa-10.6.0~git20150318.27bf37ba/src/egl/main/Makefile.sources --- mesa-10.6.0~git20150310.5750595c/src/egl/main/Makefile.sources 2014-09-10 05:44:12.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/egl/main/Makefile.sources 2015-03-18 08:43:34.000000000 +0000 @@ -22,11 +22,8 @@ eglimage.h \ egllog.c \ egllog.h \ - eglmisc.c \ - eglmisc.h \ eglmode.c \ eglmode.h \ - eglmutex.h \ eglscreen.c \ eglscreen.h \ eglstring.c \ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp --- mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 2015-01-14 13:02:07.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 2015-03-18 08:43:34.000000000 +0000 @@ -399,19 +399,23 @@ break; } } + + if (pc >= extent) { + Out << "disassembly larger than " << extent << "bytes, aborting\n"; + break; + } } + Out << "\n"; + Out.flush(); + /* * Print GDB command, useful to verify output. */ - if (0) { _debug_printf("disassemble %p %p\n", bytes, bytes + pc); } - Out << "\n"; - Out.flush(); - return pc; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/gallivm/lp_bld_limits.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/gallivm/lp_bld_limits.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/gallivm/lp_bld_limits.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/gallivm/lp_bld_limits.h 2015-03-18 08:43:34.000000000 +0000 @@ -129,6 +129,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } /* if we get here, we missed a shader cap above (and should have seen diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp --- mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 2015-03-18 08:43:34.000000000 +0000 @@ -502,6 +502,7 @@ #if HAVE_LLVM >= 0x0306 builder.setMCJITMemoryManager(std::unique_ptr(MM)); + MM = NULL; // ownership taken by std::unique_ptr #else builder.setMCJITMemoryManager(MM); #endif diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/os/os_mman.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/os/os_mman.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/os/os_mman.h 2015-01-14 13:02:07.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/os/os_mman.h 2015-03-18 08:43:34.000000000 +0000 @@ -70,8 +70,8 @@ return __mmap2(addr, length, prot, flags, fd, (size_t) (offset >> 12)); } -# define drm_munmap(addr, length) \ - munmap(addr, length) +# define os_munmap(addr, length) \ + munmap(addr, length) #else /* assume large file support exists */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/tgsi/tgsi_exec.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/tgsi/tgsi_exec.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/tgsi/tgsi_exec.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/tgsi/tgsi_exec.h 2015-03-18 08:43:34.000000000 +0000 @@ -459,6 +459,7 @@ case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } /* if we get here, we missed a shader cap above (and should have seen diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/tgsi/tgsi_info.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/tgsi/tgsi_info.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/tgsi/tgsi_info.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/tgsi/tgsi_info.c 2015-03-18 08:43:34.000000000 +0000 @@ -56,7 +56,7 @@ { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, - { 0, 0, 0, 0, 0, 0, NONE, "", 19 }, /* removed */ + { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, { 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ @@ -155,7 +155,7 @@ { 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, { 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END }, - { 0, 0, 0, 0, 0, 0, NONE, "", 118 }, /* removed */ + { 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA }, { 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I }, { 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV }, { 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX }, @@ -341,6 +341,12 @@ case TGSI_OPCODE_USNE: case TGSI_OPCODE_SVIEWINFO: case TGSI_OPCODE_UMUL_HI: + case TGSI_OPCODE_UBFE: + case TGSI_OPCODE_BFI: + case TGSI_OPCODE_BREV: + case TGSI_OPCODE_POPC: + case TGSI_OPCODE_LSB: + case TGSI_OPCODE_UMSB: return TGSI_TYPE_UNSIGNED; case TGSI_OPCODE_ARL: case TGSI_OPCODE_ARR: @@ -362,6 +368,8 @@ case TGSI_OPCODE_IABS: case TGSI_OPCODE_ISSG: case TGSI_OPCODE_IMUL_HI: + case TGSI_OPCODE_IBFE: + case TGSI_OPCODE_IMSB: return TGSI_TYPE_SIGNED; default: return TGSI_TYPE_FLOAT; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/tgsi/tgsi_util.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/tgsi/tgsi_util.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/tgsi/tgsi_util.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/tgsi/tgsi_util.c 2015-03-18 08:43:34.000000000 +0000 @@ -193,6 +193,7 @@ case TGSI_OPCODE_MAD: case TGSI_OPCODE_SUB: case TGSI_OPCODE_LRP: + case TGSI_OPCODE_FMA: case TGSI_OPCODE_FRC: case TGSI_OPCODE_CEIL: case TGSI_OPCODE_CLAMP: diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/util/u_slab.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/util/u_slab.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/auxiliary/util/u_slab.h 2013-05-08 13:13:18.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/auxiliary/util/u_slab.h 2015-03-18 08:43:34.000000000 +0000 @@ -81,7 +81,16 @@ void util_slab_set_thread_safety(struct util_slab_mempool *pool, enum util_slab_threading threading); -#define util_slab_alloc(pool) (pool)->alloc(pool) -#define util_slab_free(pool, ptr) (pool)->free(pool, ptr) +static inline void * +util_slab_alloc(struct util_slab_mempool *pool) +{ + return pool->alloc(pool); +} + +static inline void +util_slab_free(struct util_slab_mempool *pool, void *ptr) +{ + pool->free(pool, ptr); +} #endif diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/docs/source/screen.rst mesa-10.6.0~git20150318.27bf37ba/src/gallium/docs/source/screen.rst --- mesa-10.6.0~git20150310.5750595c/src/gallium/docs/source/screen.rst 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/docs/source/screen.rst 2015-03-18 08:43:34.000000000 +0000 @@ -336,6 +336,8 @@ is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used. * ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and DLDEXP are supported. +* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only) + are supported. .. _pipe_compute_cap: diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/docs/source/tgsi.rst mesa-10.6.0~git20150318.27bf37ba/src/gallium/docs/source/tgsi.rst --- mesa-10.6.0~git20150310.5750595c/src/gallium/docs/source/tgsi.rst 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/docs/source/tgsi.rst 2015-03-18 08:43:34.000000000 +0000 @@ -272,6 +272,21 @@ dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w +.. opcode:: FMA - Fused Multiply-Add + +Perform a * b + c with no intermediate rounding step. + +.. math:: + + dst.x = src0.x \times src1.x + src2.x + + dst.y = src0.y \times src1.y + src2.y + + dst.z = src0.z \times src1.z + src2.z + + dst.w = src0.w \times src1.w + src2.w + + .. opcode:: DP2A - 2-component Dot Product And Add .. math:: @@ -1957,6 +1972,17 @@ .. math:: + dst.xy = src0.xy \times src1.xy + src2.xy + + dst.zw = src0.zw \times src1.zw + src2.zw + + +.. opcode:: DFMA - Fused Multiply-Add + +Perform a * b + c with no intermediate rounding step. + +.. math:: + dst.xy = src0.xy \times src1.xy + src2.xy dst.zw = src0.zw \times src1.zw + src2.zw diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h 2015-03-18 08:43:34.000000000 +0000 @@ -13,7 +13,7 @@ - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64848 bytes, from 2015-02-20 18:21:24) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) Copyright (C) 2013-2014 by the following authors: diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h 2015-03-18 08:43:34.000000000 +0000 @@ -13,7 +13,7 @@ - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64848 bytes, from 2015-02-20 18:21:24) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) Copyright (C) 2013-2015 by the following authors: @@ -2680,7 +2680,7 @@ } #define REG_A3XX_TEX_CONST_3 0x00000003 -#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x0000000f +#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x00001fff #define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT 0 static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val) { diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h 2015-03-18 08:43:34.000000000 +0000 @@ -13,7 +13,7 @@ - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64848 bytes, from 2015-02-20 18:21:24) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) Copyright (C) 2013-2015 by the following authors: diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/adreno_common.xml.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/adreno_common.xml.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/adreno_common.xml.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/adreno_common.xml.h 2015-03-18 08:43:34.000000000 +0000 @@ -13,7 +13,7 @@ - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64848 bytes, from 2015-02-20 18:21:24) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) Copyright (C) 2013-2014 by the following authors: diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/adreno_pm4.xml.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/adreno_pm4.xml.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/adreno_pm4.xml.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/adreno_pm4.xml.h 2015-03-18 08:43:34.000000000 +0000 @@ -13,7 +13,7 @@ - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64848 bytes, from 2015-02-20 18:21:24) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) Copyright (C) 2013-2014 by the following authors: diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/freedreno_resource.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/freedreno_resource.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/freedreno_resource.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/freedreno_resource.c 2015-03-18 08:43:34.000000000 +0000 @@ -213,9 +213,16 @@ for (level = 0; level <= prsc->last_level; level++) { struct fd_resource_slice *slice = fd_resource_slice(rsc, level); - slice->pitch = align(width, 32); + slice->pitch = width = align(width, 32); slice->offset = size; - slice->size0 = align(slice->pitch * height * rsc->cpp, alignment); + /* 1d array, 2d array, 3d textures (but not cube!) must all have the + * same layer size for each miplevel on a3xx. These are also the + * targets that have non-1 alignment. + */ + if (level == 0 || layers_in_level == 1 || alignment == 1) + slice->size0 = align(slice->pitch * height * rsc->cpp, alignment); + else + slice->size0 = rsc->slices[0].size0; size += slice->size0 * depth * layers_in_level; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/freedreno_screen.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/freedreno_screen.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/freedreno_screen.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/freedreno_screen.c 2015-03-18 08:43:34.000000000 +0000 @@ -67,7 +67,6 @@ {"nobypass", FD_DBG_NOBYPASS, "Disable GMEM bypass"}, {"fraghalf", FD_DBG_FRAGHALF, "Use half-precision in fragment shader"}, {"nobin", FD_DBG_NOBIN, "Disable hw binning"}, - {"noopt", FD_DBG_NOOPT , "Disable optimization passes in compiler"}, {"optmsgs", FD_DBG_OPTMSGS,"Enable optimizater debug messages"}, {"optdump", FD_DBG_OPTDUMP,"Dump shader DAG to .dot files"}, {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"}, @@ -364,6 +363,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/freedreno_util.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/freedreno_util.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/freedreno_util.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/freedreno_util.h 2015-03-18 08:43:34.000000000 +0000 @@ -62,7 +62,6 @@ #define FD_DBG_NOBYPASS 0x0040 #define FD_DBG_FRAGHALF 0x0080 #define FD_DBG_NOBIN 0x0100 -#define FD_DBG_NOOPT 0x0200 #define FD_DBG_OPTMSGS 0x0400 #define FD_DBG_OPTDUMP 0x0800 #define FD_DBG_GLSL120 0x1000 diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3.c 2015-03-18 08:43:34.000000000 +0000 @@ -588,21 +588,10 @@ static uint32_t serialno = 0; instr->serialno = ++serialno; #endif - if (shader->instrs_count == shader->instrs_sz) { - shader->instrs_sz = MAX2(2 * shader->instrs_sz, 16); - shader->instrs = realloc(shader->instrs, - shader->instrs_sz * sizeof(shader->instrs[0])); - } - shader->instrs[shader->instrs_count++] = instr; + array_insert(shader->instrs, instr); - if (is_input(instr)) { - if (shader->baryfs_count == shader->baryfs_sz) { - shader->baryfs_sz = MAX2(2 * shader->baryfs_sz, 16); - shader->baryfs = realloc(shader->baryfs, - shader->baryfs_sz * sizeof(shader->baryfs[0])); - } - shader->baryfs[shader->baryfs_count++] = instr; - } + if (is_input(instr)) + array_insert(shader->baryfs, instr); } struct ir3_block * ir3_block_create(struct ir3 *shader, diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 2015-03-18 08:43:34.000000000 +0000 @@ -355,27 +355,17 @@ break; } - if (!(fd_mesa_debug & FD_DBG_NOOPT)) { - /* with new compiler: */ - info = "new compiler"; - ret = ir3_compile_shader(&v, toks, key, true); - - if (ret) { - reset_variant(&v, "new compiler failed, trying without copy propagation!"); - info = "new compiler (no copy propagation)"; - ret = ir3_compile_shader(&v, toks, key, false); - if (ret) - reset_variant(&v, "new compiler failed, trying fallback!\n"); - } - } + info = "compiler"; + ret = ir3_compile_shader(&v, toks, key, true); if (ret) { - info = "old compiler"; - ret = ir3_compile_shader_old(&v, toks, key); + reset_variant(&v, "compiler failed, trying without copy propagation!"); + info = "compiler (no copy propagation)"; + ret = ir3_compile_shader(&v, toks, key, false); } if (ret) { - fprintf(stderr, "old compiler failed!\n"); + fprintf(stderr, "compiler failed!\n"); return ret; } dump_info(&v, info); diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_compiler.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_compiler.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_compiler.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_compiler.c 2015-03-18 08:43:34.000000000 +0000 @@ -771,6 +771,7 @@ compile_assert(ctx, ctx->block->address == instr->address); instr->address = ctx->block->address; + array_insert(ctx->ir->indirects, instr); } reg = ir3_reg_create(instr, regid(num, chan), flags); @@ -901,6 +902,7 @@ compile_assert(ctx, ctx->block->address == instr->address); instr->address = ctx->block->address; + array_insert(ctx->ir->indirects, instr); } reg = ir3_reg_create(instr, regid(num, chan), flags); diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_compiler.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_compiler.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_compiler.h 2015-01-14 13:02:07.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_compiler.h 2015-03-18 08:43:34.000000000 +0000 @@ -35,8 +35,5 @@ int ir3_compile_shader(struct ir3_shader_variant *so, const struct tgsi_token *tokens, struct ir3_shader_key key, bool cp); -int ir3_compile_shader_old(struct ir3_shader_variant *so, - const struct tgsi_token *tokens, - struct ir3_shader_key key); #endif /* FD3_COMPILER_H_ */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_compiler_old.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_compiler_old.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_compiler_old.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_compiler_old.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1539 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include - -#include "pipe/p_state.h" -#include "util/u_string.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "tgsi/tgsi_lowering.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_strings.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_scan.h" - -#include "freedreno_util.h" - -#include "ir3_compiler.h" -#include "ir3_shader.h" - -#include "instr-a3xx.h" -#include "ir3.h" - - -struct ir3_compile_context { - const struct tgsi_token *tokens; - bool free_tokens; - struct ir3 *ir; - struct ir3_block *block; - struct ir3_shader_variant *so; - - struct tgsi_parse_context parser; - unsigned type; - - struct tgsi_shader_info info; - - /* last input dst (for setting (ei) flag): */ - struct ir3_register *last_input; - - /* last instruction with relative addressing: */ - struct ir3_instruction *last_rel; - - /* for calculating input/output positions/linkages: */ - unsigned next_inloc; - - unsigned num_internal_temps; - struct tgsi_src_register internal_temps[6]; - - /* track registers which need to synchronize w/ "complex alu" cat3 - * instruction pipeline: - */ - regmask_t needs_ss; - - /* track registers which need to synchronize with texture fetch - * pipeline: - */ - regmask_t needs_sy; - - /* inputs start at r0, temporaries start after last input, and - * outputs start after last temporary. - * - * We could be more clever, because this is not a hw restriction, - * but probably best just to implement an optimizing pass to - * reduce the # of registers used and get rid of redundant mov's - * (to output register). - */ - unsigned base_reg[TGSI_FILE_COUNT]; - - /* idx/slot for last compiler generated immediate */ - unsigned immediate_idx; - - /* stack of branch instructions that start (potentially nested) - * branch instructions, so that we can fix up the branch targets - * so that we can fix up the branch target on the corresponding - * END instruction - */ - struct ir3_instruction *branch[16]; - unsigned int branch_count; - - /* used when dst is same as one of the src, to avoid overwriting a - * src element before the remaining scalar instructions that make - * up the vector operation - */ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; -}; - - -static void vectorize(struct ir3_compile_context *ctx, - struct ir3_instruction *instr, struct tgsi_dst_register *dst, - int nsrcs, ...); -static void create_mov(struct ir3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *src); - -static unsigned -compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, - const struct tgsi_token *tokens) -{ - unsigned ret, base = 0; - struct tgsi_shader_info *info = &ctx->info; - struct tgsi_lowering_config lconfig = { - .color_two_side = so->key.color_two_side, - .lower_DST = true, - .lower_XPD = true, - .lower_SCS = true, - .lower_LRP = true, - .lower_FRC = true, - .lower_POW = true, - .lower_LIT = true, - .lower_EXP = true, - .lower_LOG = true, - .lower_DP4 = true, - .lower_DP3 = true, - .lower_DPH = true, - .lower_DP2 = true, - .lower_DP2A = true, - }; - - switch (so->type) { - case SHADER_FRAGMENT: - case SHADER_COMPUTE: - lconfig.saturate_s = so->key.fsaturate_s; - lconfig.saturate_t = so->key.fsaturate_t; - lconfig.saturate_r = so->key.fsaturate_r; - break; - case SHADER_VERTEX: - lconfig.saturate_s = so->key.vsaturate_s; - lconfig.saturate_t = so->key.vsaturate_t; - lconfig.saturate_r = so->key.vsaturate_r; - break; - } - - ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info); - ctx->free_tokens = !!ctx->tokens; - if (!ctx->tokens) { - /* no lowering */ - ctx->tokens = tokens; - } - ctx->ir = so->ir; - ctx->block = ir3_block_create(ctx->ir, 0, 0, 0); - ctx->so = so; - ctx->last_input = NULL; - ctx->last_rel = NULL; - ctx->next_inloc = 8; - ctx->num_internal_temps = 0; - ctx->branch_count = 0; - - regmask_init(&ctx->needs_ss); - regmask_init(&ctx->needs_sy); - memset(ctx->base_reg, 0, sizeof(ctx->base_reg)); - - /* Immediates go after constants: */ - ctx->base_reg[TGSI_FILE_CONSTANT] = 0; - ctx->base_reg[TGSI_FILE_IMMEDIATE] = - info->file_max[TGSI_FILE_CONSTANT] + 1; - - /* if full precision and fragment shader, don't clobber - * r0.x w/ bary fetch: - */ - if ((so->type == SHADER_FRAGMENT) && !so->key.half_precision) - base = 1; - - /* Temporaries after outputs after inputs: */ - ctx->base_reg[TGSI_FILE_INPUT] = base; - ctx->base_reg[TGSI_FILE_OUTPUT] = base + - info->file_max[TGSI_FILE_INPUT] + 1; - ctx->base_reg[TGSI_FILE_TEMPORARY] = base + - info->file_max[TGSI_FILE_INPUT] + 1 + - info->file_max[TGSI_FILE_OUTPUT] + 1; - - so->first_driver_param = ~0; - so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; - ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); - - ret = tgsi_parse_init(&ctx->parser, ctx->tokens); - if (ret != TGSI_PARSE_OK) - return ret; - - ctx->type = ctx->parser.FullHeader.Processor.Processor; - - return ret; -} - -static void -compile_error(struct ir3_compile_context *ctx, const char *format, ...) -{ - va_list ap; - va_start(ap, format); - _debug_vprintf(format, ap); - va_end(ap); - tgsi_dump(ctx->tokens, 0); - debug_assert(0); -} - -#define compile_assert(ctx, cond) do { \ - if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ - } while (0) - -static void -compile_free(struct ir3_compile_context *ctx) -{ - if (ctx->free_tokens) - free((void *)ctx->tokens); - tgsi_parse_free(&ctx->parser); -} - -struct instr_translater { - void (*fxn)(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst); - unsigned tgsi_opc; - opc_t opc; - opc_t hopc; /* opc to use for half_precision mode, if different */ - unsigned arg; -}; - -static void -handle_last_rel(struct ir3_compile_context *ctx) -{ - if (ctx->last_rel) { - ctx->last_rel->flags |= IR3_INSTR_UL; - ctx->last_rel = NULL; - } -} - -static struct ir3_instruction * -instr_create(struct ir3_compile_context *ctx, int category, opc_t opc) -{ - return ir3_instr_create(ctx->block, category, opc); -} - -static void -add_nop(struct ir3_compile_context *ctx, unsigned count) -{ - while (count-- > 0) - instr_create(ctx, 0, OPC_NOP); -} - -static unsigned -src_flags(struct ir3_compile_context *ctx, struct ir3_register *reg) -{ - unsigned flags = 0; - - if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)) - return flags; - - if (regmask_get(&ctx->needs_ss, reg)) { - flags |= IR3_INSTR_SS; - regmask_init(&ctx->needs_ss); - } - - if (regmask_get(&ctx->needs_sy, reg)) { - flags |= IR3_INSTR_SY; - regmask_init(&ctx->needs_sy); - } - - return flags; -} - -static struct ir3_register * -add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -{ - unsigned flags = 0, num = 0; - struct ir3_register *reg; - - switch (dst->File) { - case TGSI_FILE_OUTPUT: - case TGSI_FILE_TEMPORARY: - num = dst->Index + ctx->base_reg[dst->File]; - break; - case TGSI_FILE_ADDRESS: - num = REG_A0; - break; - default: - compile_error(ctx, "unsupported dst register file: %s\n", - tgsi_file_name(dst->File)); - break; - } - - if (dst->Indirect) - flags |= IR3_REG_RELATIV; - if (ctx->so->key.half_precision) - flags |= IR3_REG_HALF; - - reg = ir3_reg_create(instr, regid(num, chan), flags); - - if (dst->Indirect) - ctx->last_rel = instr; - - return reg; -} - -static struct ir3_register * -add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_src_register *src, unsigned chan) -{ - unsigned flags = 0, num = 0; - struct ir3_register *reg; - - /* TODO we need to use a mov to temp for const >= 64.. or maybe - * we could use relative addressing.. - */ - compile_assert(ctx, src->Index < 64); - - switch (src->File) { - case TGSI_FILE_IMMEDIATE: - /* TODO if possible, use actual immediate instead of const.. but - * TGSI has vec4 immediates, we can only embed scalar (of limited - * size, depending on instruction..) - */ - case TGSI_FILE_CONSTANT: - flags |= IR3_REG_CONST; - num = src->Index + ctx->base_reg[src->File]; - break; - case TGSI_FILE_OUTPUT: - /* NOTE: we should only end up w/ OUTPUT file for things like - * clamp()'ing saturated dst instructions - */ - case TGSI_FILE_INPUT: - case TGSI_FILE_TEMPORARY: - num = src->Index + ctx->base_reg[src->File]; - break; - default: - compile_error(ctx, "unsupported src register file: %s\n", - tgsi_file_name(src->File)); - break; - } - - if (src->Absolute) - flags |= IR3_REG_ABS; - if (src->Negate) - flags |= IR3_REG_NEGATE; - if (src->Indirect) - flags |= IR3_REG_RELATIV; - if (ctx->so->key.half_precision) - flags |= IR3_REG_HALF; - - reg = ir3_reg_create(instr, regid(num, chan), flags); - - if (src->Indirect) - ctx->last_rel = instr; - - instr->flags |= src_flags(ctx, reg); - - return reg; -} - -static void -src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) -{ - src->File = dst->File; - src->Indirect = dst->Indirect; - src->Dimension = dst->Dimension; - src->Index = dst->Index; - src->Absolute = 0; - src->Negate = 0; - src->SwizzleX = TGSI_SWIZZLE_X; - src->SwizzleY = TGSI_SWIZZLE_Y; - src->SwizzleZ = TGSI_SWIZZLE_Z; - src->SwizzleW = TGSI_SWIZZLE_W; -} - -/* Get internal-temp src/dst to use for a sequence of instructions - * generated by a single TGSI op. - */ -static struct tgsi_src_register * -get_internal_temp(struct ir3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst) -{ - struct tgsi_src_register *tmp_src; - int n; - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); - tmp_src = &ctx->internal_temps[n]; - - tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; - - src_from_dst(tmp_src, tmp_dst); - - return tmp_src; -} - -/* Get internal half-precision temp src/dst to use for a sequence of - * instructions generated by a single TGSI op. - */ -static struct tgsi_src_register * -get_internal_temp_hr(struct ir3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst) -{ - struct tgsi_src_register *tmp_src; - int n; - - if (ctx->so->key.half_precision) - return get_internal_temp(ctx, tmp_dst); - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); - tmp_src = &ctx->internal_temps[n]; - - /* just use hr0 because no one else should be using half- - * precision regs: - */ - tmp_dst->Index = 0; - - src_from_dst(tmp_src, tmp_dst); - - return tmp_src; -} - -static inline bool -is_const(struct tgsi_src_register *src) -{ - return (src->File == TGSI_FILE_CONSTANT) || - (src->File == TGSI_FILE_IMMEDIATE); -} - -static inline bool -is_relative(struct tgsi_src_register *src) -{ - return src->Indirect; -} - -static inline bool -is_rel_or_const(struct tgsi_src_register *src) -{ - return is_relative(src) || is_const(src); -} - -static type_t -get_ftype(struct ir3_compile_context *ctx) -{ - return ctx->so->key.half_precision ? TYPE_F16 : TYPE_F32; -} - -static type_t -get_utype(struct ir3_compile_context *ctx) -{ - return ctx->so->key.half_precision ? TYPE_U16 : TYPE_U32; -} - -static unsigned -src_swiz(struct tgsi_src_register *src, int chan) -{ - switch (chan) { - case 0: return src->SwizzleX; - case 1: return src->SwizzleY; - case 2: return src->SwizzleZ; - case 3: return src->SwizzleW; - } - assert(0); - return 0; -} - -/* for instructions that cannot take a const register as src, if needed - * generate a move to temporary gpr: - */ -static struct tgsi_src_register * -get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src) -{ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - - compile_assert(ctx, is_rel_or_const(src)); - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - create_mov(ctx, &tmp_dst, src); - - return tmp_src; -} - -static void -get_immediate(struct ir3_compile_context *ctx, - struct tgsi_src_register *reg, uint32_t val) -{ - unsigned neg, swiz, idx, i; - /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ - static const unsigned swiz2tgsi[] = { - TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, - }; - - for (i = 0; i < ctx->immediate_idx; i++) { - swiz = i % 4; - idx = i / 4; - - if (ctx->so->immediates[idx].val[swiz] == val) { - neg = 0; - break; - } - - if (ctx->so->immediates[idx].val[swiz] == -val) { - neg = 1; - break; - } - } - - if (i == ctx->immediate_idx) { - /* need to generate a new immediate: */ - swiz = i % 4; - idx = i / 4; - neg = 0; - ctx->so->immediates[idx].val[swiz] = val; - ctx->so->immediates_count = idx + 1; - ctx->immediate_idx++; - } - - reg->File = TGSI_FILE_IMMEDIATE; - reg->Indirect = 0; - reg->Dimension = 0; - reg->Index = idx; - reg->Absolute = 0; - reg->Negate = neg; - reg->SwizzleX = swiz2tgsi[swiz]; - reg->SwizzleY = swiz2tgsi[swiz]; - reg->SwizzleZ = swiz2tgsi[swiz]; - reg->SwizzleW = swiz2tgsi[swiz]; -} - -static void -create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, - struct tgsi_src_register *src) -{ - type_t type_mov = get_ftype(ctx); - unsigned i; - - for (i = 0; i < 4; i++) { - /* move to destination: */ - if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *instr; - - if (src->Absolute || src->Negate) { - /* can't have abs or neg on a mov instr, so use - * absneg.f instead to handle these cases: - */ - instr = instr_create(ctx, 2, OPC_ABSNEG_F); - } else { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - } - - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src_swiz(src, i)); - } else { - add_nop(ctx, 1); - } - } -} - -static void -create_clamp(struct ir3_compile_context *ctx, - struct tgsi_dst_register *dst, struct tgsi_src_register *val, - struct tgsi_src_register *minval, struct tgsi_src_register *maxval) -{ - struct ir3_instruction *instr; - - instr = instr_create(ctx, 2, OPC_MAX_F); - vectorize(ctx, instr, dst, 2, val, 0, minval, 0); - - instr = instr_create(ctx, 2, OPC_MIN_F); - vectorize(ctx, instr, dst, 2, val, 0, maxval, 0); -} - -static void -create_clamp_imm(struct ir3_compile_context *ctx, - struct tgsi_dst_register *dst, - uint32_t minval, uint32_t maxval) -{ - struct tgsi_src_register minconst, maxconst; - struct tgsi_src_register src; - - src_from_dst(&src, dst); - - get_immediate(ctx, &minconst, minval); - get_immediate(ctx, &maxconst, maxval); - - create_clamp(ctx, dst, &src, &minconst, &maxconst); -} - -static struct tgsi_dst_register * -get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - unsigned i; - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - struct tgsi_src_register *src = &inst->Src[i].Register; - if ((src->File == dst->File) && (src->Index == dst->Index)) { - if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) && - (src->SwizzleX == TGSI_SWIZZLE_X) && - (src->SwizzleY == TGSI_SWIZZLE_Y) && - (src->SwizzleZ == TGSI_SWIZZLE_Z) && - (src->SwizzleW == TGSI_SWIZZLE_W)) - continue; - ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); - ctx->tmp_dst.WriteMask = dst->WriteMask; - dst = &ctx->tmp_dst; - break; - } - } - return dst; -} - -static void -put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst, - struct tgsi_dst_register *dst) -{ - /* if necessary, add mov back into original dst: */ - if (dst != &inst->Dst[0].Register) { - create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); - } -} - -/* helper to generate the necessary repeat and/or additional instructions - * to turn a scalar instruction into a vector operation: - */ -static void -vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr, - struct tgsi_dst_register *dst, int nsrcs, ...) -{ - va_list ap; - int i, j, n = 0; - bool indirect = dst->Indirect; - - add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X); - - va_start(ap, nsrcs); - for (j = 0; j < nsrcs; j++) { - struct tgsi_src_register *src = - va_arg(ap, struct tgsi_src_register *); - unsigned flags = va_arg(ap, unsigned); - struct ir3_register *reg; - if (flags & IR3_REG_IMMED) { - reg = ir3_reg_create(instr, 0, IR3_REG_IMMED); - /* this is an ugly cast.. should have put flags first! */ - reg->iim_val = *(int *)&src; - } else { - reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X); - indirect |= src->Indirect; - } - reg->flags |= flags & ~IR3_REG_NEGATE; - if (flags & IR3_REG_NEGATE) - reg->flags ^= IR3_REG_NEGATE; - } - va_end(ap); - - for (i = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - struct ir3_instruction *cur; - - if (n++ == 0) { - cur = instr; - } else { - cur = ir3_instr_clone(instr); - cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP); - } - - /* fix-up dst register component: */ - cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i); - - /* fix-up src register component: */ - va_start(ap, nsrcs); - for (j = 0; j < nsrcs; j++) { - struct tgsi_src_register *src = - va_arg(ap, struct tgsi_src_register *); - unsigned flags = va_arg(ap, unsigned); - if (!(flags & IR3_REG_IMMED)) { - cur->regs[j+1]->num = - regid(cur->regs[j+1]->num >> 2, - src_swiz(src, i)); - cur->flags |= src_flags(ctx, cur->regs[j+1]); - } - } - va_end(ap); - - if (indirect) - ctx->last_rel = cur; - } - } - - /* pad w/ nop's.. at least until we are clever enough to - * figure out if we really need to.. - */ - add_nop(ctx, 4 - n); -} - -/* - * Handlers for TGSI instructions which do not have a 1:1 mapping to - * native instructions: - */ - -static void -trans_clamp(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct tgsi_src_register *src2 = &inst->Src[2].Register; - - create_clamp(ctx, dst, src0, src1, src2); - - put_dst(ctx, inst, dst); -} - -/* ARL(x) = x, but mova from hrN.x to a0.. */ -static void -trans_arl(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src = &inst->Src[0].Register; - unsigned chan = src->SwizzleX; - compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS); - - handle_last_rel(ctx); - - tmp_src = get_internal_temp_hr(ctx, &tmp_dst); - - /* cov.{f32,f16}s16 Rtmp, Rsrc */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_ftype(ctx); - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, src, chan); - - add_nop(ctx, 3); - - /* shl.b Rtmp, Rtmp, 2 */ - instr = instr_create(ctx, 2, OPC_SHL_B); - add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; - - add_nop(ctx, 3); - - /* mova a0, Rtmp */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = TYPE_S16; - instr->cat1.dst_type = TYPE_S16; - add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; - add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; - - /* need to ensure 5 instr slots before a0 is used: */ - add_nop(ctx, 6); -} - -/* texture fetch/sample instructions: */ -static void -trans_samp(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_register *r; - struct ir3_instruction *instr; - struct tgsi_src_register *coord = &inst->Src[0].Register; - struct tgsi_src_register *samp = &inst->Src[1].Register; - unsigned tex = inst->Texture.Texture; - int8_t *order; - unsigned i, flags = 0, src_wrmask; - bool needs_mov = false; - - switch (t->arg) { - case TGSI_OPCODE_TEX: - if (tex == TGSI_TEXTURE_2D) { - order = (int8_t[4]){ 0, 1, -1, -1 }; - src_wrmask = TGSI_WRITEMASK_XY; - } else { - order = (int8_t[4]){ 0, 1, 2, -1 }; - src_wrmask = TGSI_WRITEMASK_XYZ; - } - break; - case TGSI_OPCODE_TXP: - if (tex == TGSI_TEXTURE_2D) { - order = (int8_t[4]){ 0, 1, 3, -1 }; - src_wrmask = TGSI_WRITEMASK_XYZ; - } else { - order = (int8_t[4]){ 0, 1, 2, 3 }; - src_wrmask = TGSI_WRITEMASK_XYZW; - } - flags |= IR3_INSTR_P; - break; - default: - compile_assert(ctx, 0); - break; - } - - if ((tex == TGSI_TEXTURE_3D) || (tex == TGSI_TEXTURE_CUBE)) { - add_nop(ctx, 3); - flags |= IR3_INSTR_3D; - } - - /* cat5 instruction cannot seem to handle const or relative: */ - if (is_rel_or_const(coord)) - needs_mov = true; - - /* The texture sample instructions need to coord in successive - * registers/components (ie. src.xy but not src.yx). And TXP - * needs the .w component in .z for 2D.. so in some cases we - * might need to emit some mov instructions to shuffle things - * around: - */ - for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++) - if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) - needs_mov = true; - - if (needs_mov) { - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - unsigned j; - - type_t type_mov = get_ftype(ctx); - - /* need to move things around: */ - tmp_src = get_internal_temp(ctx, &tmp_dst); - - for (j = 0; (j < 4) && (order[j] >= 0); j++) { - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = type_mov; - instr->cat1.dst_type = type_mov; - add_dst_reg(ctx, instr, &tmp_dst, j); - add_src_reg(ctx, instr, coord, - src_swiz(coord, order[j])); - } - - coord = tmp_src; - - add_nop(ctx, 4 - j); - } - - instr = instr_create(ctx, 5, t->opc); - instr->cat5.type = get_ftype(ctx); - instr->cat5.samp = samp->Index; - instr->cat5.tex = samp->Index; - instr->flags |= flags; - - r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0); - r->wrmask = inst->Dst[0].Register.WriteMask; - - add_src_reg(ctx, instr, coord, coord->SwizzleX)->wrmask = src_wrmask; - - /* after add_src_reg() so we don't set (sy) on sam instr itself! */ - regmask_set(&ctx->needs_sy, r); -} - -/* - * SEQ(a,b) = (a == b) ? 1.0 : 0.0 - * cmps.f.eq tmp0, b, a - * cov.u16f16 dst, tmp0 - * - * SNE(a,b) = (a != b) ? 1.0 : 0.0 - * cmps.f.eq tmp0, b, a - * add.s tmp0, tmp0, -1 - * sel.f16 dst, {0.0}, tmp0, {1.0} - * - * SGE(a,b) = (a >= b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, a, b - * cov.u16f16 dst, tmp0 - * - * SLE(a,b) = (a <= b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, b, a - * cov.u16f16 dst, tmp0 - * - * SGT(a,b) = (a > b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, b, a - * add.s tmp0, tmp0, -1 - * sel.f16 dst, {0.0}, tmp0, {1.0} - * - * SLT(a,b) = (a < b) ? 1.0 : 0.0 - * cmps.f.ge tmp0, a, b - * add.s tmp0, tmp0, -1 - * sel.f16 dst, {0.0}, tmp0, {1.0} - * - * CMP(a,b,c) = (a < 0.0) ? b : c - * cmps.f.ge tmp0, a, {0.0} - * add.s tmp0, tmp0, -1 - * sel.f16 dst, c, tmp0, b - */ -static void -trans_cmp(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval0, constval1; - /* final instruction for CMP() uses orig src1 and src2: */ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *a0, *a1; - unsigned condition; - - tmp_src = get_internal_temp(ctx, &tmp_dst); - - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - a0 = &inst->Src[1].Register; /* b */ - a1 = &inst->Src[0].Register; /* a */ - condition = IR3_COND_EQ; - break; - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SLT: - a0 = &inst->Src[0].Register; /* a */ - a1 = &inst->Src[1].Register; /* b */ - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SGT: - a0 = &inst->Src[1].Register; /* b */ - a1 = &inst->Src[0].Register; /* a */ - condition = IR3_COND_GE; - break; - case TGSI_OPCODE_CMP: - get_immediate(ctx, &constval0, fui(0.0)); - a0 = &inst->Src[0].Register; /* a */ - a1 = &constval0; /* {0.0} */ - condition = IR3_COND_GE; - break; - default: - compile_assert(ctx, 0); - return; - } - - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); - - /* cmps.f.ge tmp, a0, a1 */ - instr = instr_create(ctx, 2, OPC_CMPS_F); - instr->cat2.condition = condition; - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - - switch (t->tgsi_opc) { - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SLE: - /* cov.u16f16 dst, tmp0 */ - instr = instr_create(ctx, 1, 0); - instr->cat1.src_type = get_utype(ctx); - instr->cat1.dst_type = get_ftype(ctx); - vectorize(ctx, instr, dst, 1, tmp_src, 0); - break; - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_CMP: - /* add.s tmp, tmp, -1 */ - instr = instr_create(ctx, 2, OPC_ADD_S); - vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED); - - if (t->tgsi_opc == TGSI_OPCODE_CMP) { - /* sel.{f32,f16} dst, src2, tmp, src1 */ - instr = instr_create(ctx, 3, - ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32); - vectorize(ctx, instr, dst, 3, - &inst->Src[2].Register, 0, - tmp_src, 0, - &inst->Src[1].Register, 0); - } else { - get_immediate(ctx, &constval0, fui(0.0)); - get_immediate(ctx, &constval1, fui(1.0)); - /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ - instr = instr_create(ctx, 3, - ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32); - vectorize(ctx, instr, dst, 3, - &constval0, 0, tmp_src, 0, &constval1, 0); - } - - break; - } - - put_dst(ctx, inst, dst); -} - -/* - * Conditional / Flow control - */ - -static unsigned -find_instruction(struct ir3_compile_context *ctx, struct ir3_instruction *instr) -{ - unsigned i; - for (i = 0; i < ctx->ir->instrs_count; i++) - if (ctx->ir->instrs[i] == instr) - return i; - return ~0; -} - -static void -push_branch(struct ir3_compile_context *ctx, struct ir3_instruction *instr) -{ - ctx->branch[ctx->branch_count++] = instr; -} - -static void -pop_branch(struct ir3_compile_context *ctx) -{ - struct ir3_instruction *instr; - - /* if we were clever enough, we'd patch this up after the fact, - * and set (jp) flag on whatever the next instruction was, rather - * than inserting an extra nop.. - */ - instr = instr_create(ctx, 0, OPC_NOP); - instr->flags |= IR3_INSTR_JP; - - /* pop the branch instruction from the stack and fix up branch target: */ - instr = ctx->branch[--ctx->branch_count]; - instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1; -} - -/* We probably don't really want to translate if/else/endif into branches.. - * the blob driver evaluates both legs of the if and then uses the sel - * instruction to pick which sides of the branch to "keep".. but figuring - * that out will take somewhat more compiler smarts. So hopefully branches - * don't kill performance too badly. - */ -static void -trans_if(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - struct tgsi_src_register *src = &inst->Src[0].Register; - struct tgsi_src_register constval; - - get_immediate(ctx, &constval, fui(0.0)); - - if (is_const(src)) - src = get_unconst(ctx, src); - - instr = instr_create(ctx, 2, OPC_CMPS_F); - ir3_reg_create(instr, regid(REG_P0, 0), 0); - add_src_reg(ctx, instr, src, src->SwizzleX); - add_src_reg(ctx, instr, &constval, constval.SwizzleX); - instr->cat2.condition = IR3_COND_EQ; - - instr = instr_create(ctx, 0, OPC_BR); - push_branch(ctx, instr); -} - -static void -trans_else(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct ir3_instruction *instr; - - /* for first half of if/else/endif, generate a jump past the else: */ - instr = instr_create(ctx, 0, OPC_JUMP); - - pop_branch(ctx); - push_branch(ctx, instr); -} - -static void -trans_endif(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - pop_branch(ctx); -} - -/* - * Handlers for TGSI instructions which do have 1:1 mapping to native - * instructions: - */ - -static void -instr_cat0(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - instr_create(ctx, 0, t->opc); -} - -static void -instr_cat1(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - - /* mov instructions can't handle a negate on src: */ - if (src->Negate) { - struct tgsi_src_register constval; - struct ir3_instruction *instr; - - /* since right now, we are using uniformly either TYPE_F16 or - * TYPE_F32, and we don't utilize the conversion possibilities - * of mov instructions, we can get away with substituting an - * add.f which can handle negate. Might need to revisit this - * in the future if we start supporting widening/narrowing or - * conversion to/from integer.. - */ - instr = instr_create(ctx, 2, OPC_ADD_F); - get_immediate(ctx, &constval, fui(0.0)); - vectorize(ctx, instr, dst, 2, src, 0, &constval, 0); - } else { - create_mov(ctx, dst, src); - /* create_mov() generates vector sequence, so no vectorize() */ - } - put_dst(ctx, inst, dst); -} - -static void -instr_cat2(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - unsigned src0_flags = 0, src1_flags = 0; - - switch (t->tgsi_opc) { - case TGSI_OPCODE_ABS: - src0_flags = IR3_REG_ABS; - break; - case TGSI_OPCODE_SUB: - src1_flags = IR3_REG_NEGATE; - break; - } - - switch (t->opc) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 1, src0, src0_flags); - break; - default: - if (is_const(src0) && is_const(src1)) - src0 = get_unconst(ctx, src0); - - instr = instr_create(ctx, 2, t->opc); - vectorize(ctx, instr, dst, 2, src0, src0_flags, - src1, src1_flags); - break; - } - - put_dst(ctx, inst, dst); -} - -static void -instr_cat3(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - - /* in particular, can't handle const for src1 for cat3.. - * for mad, we can swap first two src's if needed: - */ - if (is_rel_or_const(src1)) { - if (is_mad(t->opc) && !is_rel_or_const(src0)) { - struct tgsi_src_register *tmp; - tmp = src0; - src0 = src1; - src1 = tmp; - } else { - src1 = get_unconst(ctx, src1); - } - } - - instr = instr_create(ctx, 3, - ctx->so->key.half_precision ? t->hopc : t->opc); - vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, - &inst->Src[2].Register, 0); - put_dst(ctx, inst, dst); -} - -static void -instr_cat4(const struct instr_translater *t, - struct ir3_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src = &inst->Src[0].Register; - struct ir3_instruction *instr; - unsigned i, n; - - /* seems like blob compiler avoids const as src.. */ - if (is_const(src)) - src = get_unconst(ctx, src); - - /* worst case: */ - add_nop(ctx, 6); - - /* we need to replicate into each component: */ - for (i = 0, n = 0; i < 4; i++) { - if (dst->WriteMask & (1 << i)) { - if (n++) - add_nop(ctx, 1); - instr = instr_create(ctx, 4, t->opc); - add_dst_reg(ctx, instr, dst, i); - add_src_reg(ctx, instr, src, src->SwizzleX); - } - } - - regmask_set(&ctx->needs_ss, instr->regs[0]); - put_dst(ctx, inst, dst); -} - -static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { -#define INSTR(n, f, ...) \ - [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ } - - INSTR(MOV, instr_cat1), - INSTR(RCP, instr_cat4, .opc = OPC_RCP), - INSTR(RSQ, instr_cat4, .opc = OPC_RSQ), - INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), - INSTR(MUL, instr_cat2, .opc = OPC_MUL_F), - INSTR(ADD, instr_cat2, .opc = OPC_ADD_F), - INSTR(SUB, instr_cat2, .opc = OPC_ADD_F), - INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), - INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), - INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), - INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F), - INSTR(CLAMP, trans_clamp), - INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), - INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F), - INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F), - INSTR(ARL, trans_arl), - INSTR(EX2, instr_cat4, .opc = OPC_EXP2), - INSTR(LG2, instr_cat4, .opc = OPC_LOG2), - INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F), - INSTR(COS, instr_cat4, .opc = OPC_COS), - INSTR(SIN, instr_cat4, .opc = OPC_SIN), - INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), - INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), - INSTR(SGT, trans_cmp), - INSTR(SLT, trans_cmp), - INSTR(SGE, trans_cmp), - INSTR(SLE, trans_cmp), - INSTR(SNE, trans_cmp), - INSTR(SEQ, trans_cmp), - INSTR(CMP, trans_cmp), - INSTR(IF, trans_if), - INSTR(ELSE, trans_else), - INSTR(ENDIF, trans_endif), - INSTR(END, instr_cat0, .opc = OPC_END), - INSTR(KILL, instr_cat0, .opc = OPC_KILL), -}; - -static ir3_semantic -decl_semantic(const struct tgsi_declaration_semantic *sem) -{ - return ir3_semantic_name(sem->Name, sem->Index); -} - -static int -decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned base = ctx->base_reg[TGSI_FILE_INPUT]; - unsigned i, flags = 0; - int nop = 0; - - /* I don't think we should get frag shader input without - * semantic info? Otherwise how do inputs get linked to - * vert outputs? - */ - compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) || - decl->Declaration.Semantic); - - if (ctx->so->key.half_precision) - flags |= IR3_REG_HALF; - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->inputs_count++; - unsigned r = regid(i + base, 0); - unsigned ncomp; - - /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */ - ncomp = 4; - - DBG("decl in -> r%d", i + base); // XXX - - compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); - - so->inputs[n].semantic = decl_semantic(&decl->Semantic); - so->inputs[n].compmask = (1 << ncomp) - 1; - so->inputs[n].ncomp = ncomp; - so->inputs[n].regid = r; - so->inputs[n].inloc = ctx->next_inloc; - so->inputs[n].bary = true; /* all that is supported */ - ctx->next_inloc += ncomp; - - so->total_in += ncomp; - - /* for frag shaders, we need to generate the corresponding bary instr: */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - unsigned j; - - for (j = 0; j < ncomp; j++) { - struct ir3_instruction *instr; - struct ir3_register *dst; - - instr = instr_create(ctx, 2, OPC_BARY_F); - - /* dst register: */ - dst = ir3_reg_create(instr, r + j, flags); - ctx->last_input = dst; - - /* input position: */ - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = - so->inputs[n].inloc + j - 8; - - /* input base (always r0.xy): */ - ir3_reg_create(instr, regid(0,0), 0)->wrmask = 0x3; - } - - nop = 6; - } - } - - return nop; -} - -static void -decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT]; - unsigned comp = 0; - unsigned name = decl->Semantic.Name; - unsigned i; - - compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true? - - DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX - - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - switch (name) { - case TGSI_SEMANTIC_POSITION: - so->writes_pos = true; - break; - case TGSI_SEMANTIC_PSIZE: - so->writes_psize = true; - break; - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_BCOLOR: - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_TEXCOORD: - break; - default: - compile_error(ctx, "unknown VS semantic name: %s\n", - tgsi_semantic_names[name]); - } - } else { - switch (name) { - case TGSI_SEMANTIC_POSITION: - comp = 2; /* tgsi will write to .z component */ - so->writes_pos = true; - break; - case TGSI_SEMANTIC_COLOR: - break; - default: - compile_error(ctx, "unknown FS semantic name: %s\n", - tgsi_semantic_names[name]); - } - } - - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - unsigned n = so->outputs_count++; - compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); - so->outputs[n].semantic = decl_semantic(&decl->Semantic); - so->outputs[n].regid = regid(i + base, comp); - } -} - -static void -decl_samp(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - ctx->so->has_samp = true; -} - -static void -compile_instructions(struct ir3_compile_context *ctx) -{ - struct ir3 *ir = ctx->ir; - int nop = 0; - - while (!tgsi_parse_end_of_tokens(&ctx->parser)) { - tgsi_parse_token(&ctx->parser); - - switch (ctx->parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *decl = - &ctx->parser.FullToken.FullDeclaration; - if (decl->Declaration.File == TGSI_FILE_OUTPUT) { - decl_out(ctx, decl); - } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - nop = decl_in(ctx, decl); - } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - decl_samp(ctx, decl); - } - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: { - /* TODO: if we know the immediate is small enough, and only - * used with instructions that can embed an immediate, we - * can skip this: - */ - struct tgsi_full_immediate *imm = - &ctx->parser.FullToken.FullImmediate; - unsigned n = ctx->so->immediates_count++; - memcpy(ctx->so->immediates[n].val, imm->u, 16); - break; - } - case TGSI_TOKEN_TYPE_INSTRUCTION: { - struct tgsi_full_instruction *inst = - &ctx->parser.FullToken.FullInstruction; - unsigned opc = inst->Instruction.Opcode; - const struct instr_translater *t = &translaters[opc]; - - add_nop(ctx, nop); - nop = 0; - - if (t->fxn) { - t->fxn(t, ctx, inst); - ctx->num_internal_temps = 0; - } else { - compile_error(ctx, "unknown TGSI opc: %s\n", - tgsi_get_opcode_name(opc)); - } - - switch (inst->Instruction.Saturate) { - case TGSI_SAT_ZERO_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(0.0), fui(1.0)); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - create_clamp_imm(ctx, &inst->Dst[0].Register, - fui(-1.0), fui(1.0)); - break; - } - - break; - } - default: - break; - } - } - - if (ir->instrs_count > 0) - ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; - - if (ctx->last_input) - ctx->last_input->flags |= IR3_REG_EI; - - handle_last_rel(ctx); -} - -int -ir3_compile_shader_old(struct ir3_shader_variant *so, - const struct tgsi_token *tokens, struct ir3_shader_key key) -{ - struct ir3_compile_context ctx; - - assert(!so->ir); - - so->ir = ir3_create(); - - assert(so->ir); - - if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) - return -1; - - compile_instructions(&ctx); - - compile_free(&ctx); - - return 0; -} diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3.h 2015-03-18 08:43:34.000000000 +0000 @@ -38,8 +38,6 @@ struct ir3_instruction; struct ir3_block; -struct ir3 * fd_asm_parse(const char *src); - struct ir3_info { uint16_t sizedwords; uint16_t instrs_count; /* expanded to account for rpt's */ @@ -313,8 +311,29 @@ struct ir3 { unsigned instrs_count, instrs_sz; struct ir3_instruction **instrs; + + /* Track bary.f (and ldlv) instructions.. this is needed in + * scheduling to ensure that all varying fetches happen before + * any potential kill instructions. The hw gets grumpy if all + * threads in a group are killed before the last bary.f gets + * a chance to signal end of input (ei). + */ unsigned baryfs_count, baryfs_sz; struct ir3_instruction **baryfs; + + /* Track all indirect instructions (read and write). To avoid + * deadlock scenario where an address register gets scheduled, + * but other dependent src instructions cannot be scheduled due + * to dependency on a *different* address register value, the + * scheduler needs to ensure that all dependencies other than + * the instruction other than the address register are scheduled + * before the one that writes the address register. Having a + * convenient list of instructions that reference some address + * register simplifies this. + */ + unsigned indirects_count, indirects_sz; + struct ir3_instruction **indirects; + struct ir3_block *block; unsigned heap_idx; struct ir3_heap_chunk *chunk; @@ -503,6 +522,13 @@ return true; } +#define array_insert(arr, val) do { \ + if (arr ## _count == arr ## _sz) { \ + arr ## _sz = MAX2(2 * arr ## _sz, 16); \ + arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \ + } \ + arr[arr ##_count++] = val; \ + } while (0) /* iterator for an instructions's sources (reg), also returns src #: */ #define foreach_src_n(__srcreg, __n, __instr) \ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_sched.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_sched.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_sched.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_sched.c 2015-03-18 08:43:34.000000000 +0000 @@ -199,7 +199,8 @@ } /* A negative return value signals that an instruction has been newly - * scheduled, return back up to the top of the stack (to block_sched()) + * SCHEDULED (or DELAYED due to address or predicate register already + * in use), return back up to the top of the stack (to block_sched()) */ static int trysched(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) @@ -252,14 +253,45 @@ unsigned i; for (i = 0; i < ir->baryfs_count; i++) { - if (ir->baryfs[i]->depth == DEPTH_UNUSED) + struct ir3_instruction *baryf = ir->baryfs[i]; + if (baryf->depth == DEPTH_UNUSED) continue; - delay = trysched(ctx, ir->baryfs[i]); + delay = trysched(ctx, baryf); if (delay) return delay; } } + /* if instruction writes address register, we need to ensure + * that the instructions which use the address register value + * have all their other dependencies scheduled. + * TODO we may possibly need to do the same thing with predicate + * register usage, but for now we get by without since the + * predicate usage patterns are more simple + */ + if (writes_addr(instr)) { + struct ir3 *ir = instr->block->shader; + unsigned i; + + for (i = 0; i < ir->indirects_count; i++) { + struct ir3_instruction *indirect = ir->indirects[i]; + if (indirect->depth == DEPTH_UNUSED) + continue; + if (indirect->address != instr) + continue; + /* NOTE: avoid recursively scheduling the dependency + * on ourself (ie. avoid infinite recursion): + */ + foreach_ssa_src(src, indirect) { + if (src == instr) + continue; + delay = trysched(ctx, src); + if (delay) + return delay; + } + } + } + /* if this is a write to address/predicate register, and that * register is currently in use, we need to defer until it is * free: diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_shader.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_shader.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/ir3/ir3_shader.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/ir3/ir3_shader.c 2015-03-18 08:43:34.000000000 +0000 @@ -177,21 +177,12 @@ tgsi_dump(tokens, 0); } - if (!(fd_mesa_debug & FD_DBG_NOOPT)) { - ret = ir3_compile_shader(v, tokens, key, true); - if (ret) { - reset_variant(v, "new compiler failed, trying without copy propagation!"); - ret = ir3_compile_shader(v, tokens, key, false); - if (ret) - reset_variant(v, "new compiler failed, trying fallback!"); - } - } else { - ret = -1; /* force fallback to old compiler */ + ret = ir3_compile_shader(v, tokens, key, true); + if (ret) { + reset_variant(v, "new compiler failed, trying without copy propagation!"); + ret = ir3_compile_shader(v, tokens, key, false); } - if (ret) - ret = ir3_compile_shader_old(v, tokens, key); - if (ret) { debug_error("compile failed!"); goto fail; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/Makefile.sources mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/Makefile.sources --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/freedreno/Makefile.sources 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/freedreno/Makefile.sources 2015-03-18 08:43:34.000000000 +0000 @@ -122,7 +122,6 @@ ir3/ir3.c \ ir3/ir3_compiler.c \ ir3/ir3_compiler.h \ - ir3/ir3_compiler_old.c \ ir3/ir3_cp.c \ ir3/ir3_depth.c \ ir3/ir3_dump.c \ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/i915/i915_screen.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/i915/i915_screen.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/i915/i915_screen.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/i915/i915_screen.c 2015-03-18 08:43:34.000000000 +0000 @@ -158,6 +158,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; default: debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/nouveau/nv30/nv30_screen.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/nouveau/nv30/nv30_screen.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/nouveau/nv30/nv30_screen.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/nouveau/nv30/nv30_screen.c 2015-03-18 08:43:35.000000000 +0000 @@ -250,6 +250,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; default: debug_printf("unknown vertex shader param %d\n", param); @@ -289,6 +290,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; default: debug_printf("unknown fragment shader param %d\n", param); diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/nouveau/nv50/nv50_screen.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/nouveau/nv50/nv50_screen.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/nouveau/nv50/nv50_screen.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/nouveau/nv50/nv50_screen.c 2015-03-18 08:43:35.000000000 +0000 @@ -289,6 +289,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 2015-03-18 08:43:35.000000000 +0000 @@ -295,6 +295,7 @@ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return 16; /* would be 32 in linked (OpenGL-style) mode */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/r300/r300_screen.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/r300/r300_screen.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/r300/r300_screen.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/r300/r300_screen.c 2015-03-18 08:43:35.000000000 +0000 @@ -287,6 +287,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; @@ -341,6 +342,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/r600/evergreen_state.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/r600/evergreen_state.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/r600/evergreen_state.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/r600/evergreen_state.c 2015-03-18 08:43:35.000000000 +0000 @@ -1692,7 +1692,7 @@ evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples); } else { cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples); - cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, rctx->ps_iter_samples); + cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, rctx->ps_iter_samples, 0); } } diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/r600/r600_pipe.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/r600/r600_pipe.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/r600/r600_pipe.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/r600/r600_pipe.c 2015-03-18 08:43:35.000000000 +0000 @@ -493,6 +493,7 @@ return 0; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } return 0; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/r600/r600_shader.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/r600/r600_shader.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/r600/r600_shader.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/r600/r600_shader.c 2015-03-18 08:43:35.000000000 +0000 @@ -283,7 +283,7 @@ unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; - struct r600_shader_tgsi_instruction *inst_info; + const struct r600_shader_tgsi_instruction *inst_info; struct r600_bytecode *bc; struct r600_shader *shader; struct r600_shader_src src[4]; @@ -311,14 +311,12 @@ }; struct r600_shader_tgsi_instruction { - unsigned tgsi_opcode; - unsigned is_op3; unsigned op; int (*process)(struct r600_shader_ctx *ctx); }; static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind); -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; +static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); @@ -2543,8 +2541,10 @@ static int tgsi_unsupported(struct r600_shader_ctx *ctx) { + const unsigned tgsi_opcode = + ctx->parse.FullToken.FullInstruction.Instruction.Opcode; R600_ERR("%s tgsi opcode unsupported\n", - tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); + tgsi_get_opcode_name(tgsi_opcode)); return -EINVAL; } @@ -2639,7 +2639,7 @@ r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ - switch (ctx->inst_info->tgsi_opcode) { + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_SUB: r600_bytecode_src_toggle_neg(&alu.src[1]); break; @@ -2738,7 +2738,7 @@ r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); /* RSQ should take the absolute value of src */ - if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) { + if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { r600_bytecode_src_set_abs(&alu.src[j]); } } @@ -3079,6 +3079,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) { + const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; int i, r; @@ -3090,7 +3091,7 @@ alu.src[0].sel = V_SQ_ALU_SRC_0; - if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILL) { + if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { @@ -4945,7 +4946,7 @@ alu.dst.chan = i; alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ - switch (ctx->inst_info->tgsi_opcode) { + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_DP2: if (i > 1) { alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -7269,605 +7270,605 @@ return 0; } -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_r600_arl}, - {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, - {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, +static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { + [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, + [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, + [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, /* XXX: * For state trackers other than OpenGL, we'll want to use * _RECIP_IEEE instead. */ - {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_RSQ, 0, ALU_OP0_NOP, tgsi_rsq}, - {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, - {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, - {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, - {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, - {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, - {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, - {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, - {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, - {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, - {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, - {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, - {22, 0, ALU_OP0_NOP, tgsi_unsupported}, - {23, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, - {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, - {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow}, - {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, - {32, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, - {34, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig}, - {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, - {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, - {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ - {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {44, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, - {46, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, - {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig}, - {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, - {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, - {51, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, - {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {59, 0, ALU_OP0_NOP, tgsi_unsupported}, - {60, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_r600_arl}, - {62, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, - {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, - {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, - {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, - {69, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, - {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, - {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, - {76, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, - {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, - {TGSI_OPCODE_DDX_FINE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DDY_FINE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, - {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, - {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, - {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, - {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2_trans}, - {88, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, - {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, - {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, - {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, - {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, - {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, - {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, - {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, - {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, - {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, - {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, - {104, 0, ALU_OP0_NOP, tgsi_unsupported}, - {105, 0, ALU_OP0_NOP, tgsi_unsupported}, - {106, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, - {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, - {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, - {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, - {112, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, - {114, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_loop_breakc}, - {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ - {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ - {118, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, - {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, - {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, - {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, - {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, - {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, - {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2_trans}, - {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, - {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, - {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, - {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, - {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, - {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, - {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, - {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, - {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, - {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans}, - {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, - {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, - {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2_trans}, - {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, - {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2_swap}, - {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_r600_arl}, - {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, - {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, - {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, - {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, - {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans}, - {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans}, - {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_unsupported}, - {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_unsupported}, - {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_unsupported}, - {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_unsupported}, - {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_unsupported}, - {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_unsupported}, - {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_unsupported}, - {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_unsupported}, - {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_unsupported}, - {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, + [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, + [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, + [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, + [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, + [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, + [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, + [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, + [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [22] = { ALU_OP0_NOP, tgsi_unsupported}, + [23] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, + [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, + [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, + [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, + [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, + [32] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, + [34] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, + [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, + [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, + [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ + [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, + [44] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, + [46] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, + [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, + [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, + [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, + [51] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, + [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, + [59] = { ALU_OP0_NOP, tgsi_unsupported}, + [60] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, + [62] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, + [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, + [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, + [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, + [69] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, + [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, + [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, + [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, + [76] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, + [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, + [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, + [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, + [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, + [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, + [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, + [88] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, + [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, + [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, + [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, + [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, + [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, + [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, + [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, + [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, + [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, + [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + [104] = { ALU_OP0_NOP, tgsi_unsupported}, + [105] = { ALU_OP0_NOP, tgsi_unsupported}, + [106] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, + [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, + [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, + [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, + [112] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, + [114] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, + [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ + [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ + [118] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, + [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, + [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, + [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, + [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, + [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, + [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, + [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, + [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, + [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, + [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, + [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, + [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, + [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, + [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, + [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, + [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, + [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, + [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, + [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, + [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, + [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, + [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, + [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, + [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, + [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, + [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, + [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, + [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, + [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, + [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, + [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, + [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, + [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, + [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, + [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, + [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, + [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, + [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, + [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, }; -static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl}, - {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, - {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, - {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, - {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, - {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, - {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, - {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, - {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, - {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, - {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, - {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, - {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, - {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, - {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, - {22, 0, ALU_OP0_NOP, tgsi_unsupported}, - {23, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, - {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, - {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow}, - {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, - {32, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, - {34, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig}, - {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, - {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, - {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ - {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {44, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, - {46, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, - {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig}, - {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, - {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, - {51, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, - {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {59, 0, ALU_OP0_NOP, tgsi_unsupported}, - {60, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl}, - {62, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, - {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, - {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, - {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, - {69, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, - {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, - {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, - {76, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, - {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, - {TGSI_OPCODE_DDX_FINE, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, - {TGSI_OPCODE_DDY_FINE, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, - {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, - {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, - {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, - {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, - {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2}, - {88, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, - {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, - {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, - {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, - {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, - {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, - {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, - {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, - {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, - {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, - {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, - {104, 0, ALU_OP0_NOP, tgsi_unsupported}, - {105, 0, ALU_OP0_NOP, tgsi_unsupported}, - {106, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, - {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, - {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, - {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, - {112, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, - {114, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ - {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ - {118, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_f2i}, - {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, - {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, - {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, - {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, - {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, - {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2}, - {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, - {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_f2i}, - {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, - {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, - {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, - {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, - {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, - {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, - {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, - {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans}, - {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, - {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, - {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2}, - {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, - {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2}, - {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl}, - {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, - {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, - {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, - {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, - {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans}, - {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans}, - {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex}, - {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex}, - {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3}, - {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3}, - {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi}, - {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2}, - {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2}, - {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2}, - {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb}, - {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb}, - {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_interp_egcm}, - {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_interp_egcm}, - {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_interp_egcm}, - {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, +static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { + [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, + [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, + [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, + [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, + [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, + [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, + [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, + [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, + [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, + [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, + [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [22] = { ALU_OP0_NOP, tgsi_unsupported}, + [23] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, + [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, + [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, + [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, + [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, + [32] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, + [34] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, + [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, + [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, + [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ + [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, + [44] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, + [46] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, + [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, + [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, + [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, + [51] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, + [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, + [59] = { ALU_OP0_NOP, tgsi_unsupported}, + [60] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, + [62] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, + [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, + [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, + [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, + [69] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, + [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, + [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, + [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, + [76] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, + [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, + [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, + [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, + [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, + [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, + [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, + [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, + [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, + [88] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, + [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, + [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, + [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, + [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, + [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, + [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, + [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, + [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, + [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, + [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + [104] = { ALU_OP0_NOP, tgsi_unsupported}, + [105] = { ALU_OP0_NOP, tgsi_unsupported}, + [106] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, + [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, + [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, + [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, + [112] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, + [114] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ + [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ + [118] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, + [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, + [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, + [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, + [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, + [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, + [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, + [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, + [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, + [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, + [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, + [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, + [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, + [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, + [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, + [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, + [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, + [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, + [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, + [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, + [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, + [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, + [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, + [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, + [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, + [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, + [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, + [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, + [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, + [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, + [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, + [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, + [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, + [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, + [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, + [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, + [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, + [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, + [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, + [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, + [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, + [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, + [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, + [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, }; -static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl}, - {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, - {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, - {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, - {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, - {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, - {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, - {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, - {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, - {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, - {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, - {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, - {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, - {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, - {22, 0, ALU_OP0_NOP, tgsi_unsupported}, - {23, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, - {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, - {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, cayman_pow}, - {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, - {32, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, - {34, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_COS, 0, ALU_OP1_COS, cayman_trig}, - {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, - {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, - {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ - {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {44, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, - {46, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, - {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, cayman_trig}, - {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, - {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, - {51, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, - {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {59, 0, ALU_OP0_NOP, tgsi_unsupported}, - {60, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl}, - {62, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, - {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, - {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, - {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, - {69, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, - {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, - {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, - {76, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, - {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, - {TGSI_OPCODE_DDX_FINE, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, - {TGSI_OPCODE_DDY_FINE, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, - {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, - {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2}, - {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, - {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, - {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2}, - {88, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, - {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, - {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, - {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, - {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, - {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, - {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, - {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, - {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, - {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, - {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, - {104, 0, ALU_OP0_NOP, tgsi_unsupported}, - {105, 0, ALU_OP0_NOP, tgsi_unsupported}, - {106, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, - {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, - {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, - {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, - {112, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, - {114, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ - {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ - {118, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2}, - {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, - {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, - {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, - {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, - {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, - {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2}, - {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, - {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2}, - {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2}, - {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, - {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, - {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, - {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, - {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, - {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, - {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_INT, cayman_mul_int_instr}, - {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, - {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, - {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2}, - {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, - {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2}, - {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl}, - {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, - {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, - {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, - {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, - {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, cayman_mul_int_instr}, - {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, - {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex}, - {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex}, - {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3}, - {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3}, - {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi}, - {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2}, - {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2}, - {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2}, - {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb}, - {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb}, - {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_interp_egcm}, - {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_interp_egcm}, - {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_interp_egcm}, - {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, +static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { + [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, + [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, + [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, + [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, + [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, + [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, + [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, + [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, + [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, + [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, + [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, + [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, + [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [22] = { ALU_OP0_NOP, tgsi_unsupported}, + [23] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, + [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, + [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, + [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, + [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, + [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, + [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, + [32] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, + [34] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, + [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, + [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, + [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ + [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, + [44] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, + [46] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, + [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, + [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, + [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, + [51] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, + [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, + [59] = { ALU_OP0_NOP, tgsi_unsupported}, + [60] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, + [62] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, + [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, + [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, + [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, + [69] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, + [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, + [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, + [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, + [76] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, + [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, + [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, + [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, + [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, + [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, + [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, + [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, + [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, + [88] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, + [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, + [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, + [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, + [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, + [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, + [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, + [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, + [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, + [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, + [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + [104] = { ALU_OP0_NOP, tgsi_unsupported}, + [105] = { ALU_OP0_NOP, tgsi_unsupported}, + [106] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, + [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, + [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, + [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, + [112] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, + [114] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ + [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ + [118] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, + [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, + [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, + [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, + [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, + [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, + [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, + [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, + [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, + [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, + [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, + [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, + [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, + [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, + [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, + [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, + [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, + [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, + [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, + [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, + [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, + [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, + [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, + [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, + [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, + [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, + [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, + [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, + [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, + [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, + [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, + [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, + [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, + [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, + [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, + [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, + [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, + [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, + [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, + [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, + [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, + [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, + [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, + [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, + [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, + [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, }; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeon/cayman_msaa.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeon/cayman_msaa.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeon/cayman_msaa.c 2014-08-12 16:09:46.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeon/cayman_msaa.c 2015-03-18 08:43:35.000000000 +0000 @@ -195,9 +195,12 @@ } void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, - int ps_iter_samples) + int ps_iter_samples, int overrast_samples) { - if (nr_samples > 1) { + int setup_samples = nr_samples > 1 ? nr_samples : + overrast_samples > 1 ? overrast_samples : 0; + + if (setup_samples > 1) { /* indexed by log2(nr_samples) */ unsigned max_dist[] = { 0, @@ -206,8 +209,7 @@ cm_max_dist_8x, cm_max_dist_16x }; - - unsigned log_samples = util_logbase2(nr_samples); + unsigned log_samples = util_logbase2(setup_samples); unsigned log_ps_iter_samples = util_logbase2(util_next_power_of_two(ps_iter_samples)); @@ -218,15 +220,23 @@ S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */ - r600_write_context_reg(cs, CM_R_028804_DB_EQAA, - S_028804_MAX_ANCHOR_SAMPLES(log_samples) | - S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | - S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | - S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | - S_028804_HIGH_QUALITY_INTERSECTIONS(1) | - S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); - r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, - EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1)); + if (nr_samples > 1) { + r600_write_context_reg(cs, CM_R_028804_DB_EQAA, + S_028804_MAX_ANCHOR_SAMPLES(log_samples) | + S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | + S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | + S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | + S_028804_HIGH_QUALITY_INTERSECTIONS(1) | + S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); + r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, + EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1)); + } else if (overrast_samples > 1) { + r600_write_context_reg(cs, CM_R_028804_DB_EQAA, + S_028804_HIGH_QUALITY_INTERSECTIONS(1) | + S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) | + S_028804_OVERRASTERIZATION_AMOUNT(log_samples)); + r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0); + } } else { r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); radeon_emit(cs, S_028BDC_LAST_PIXEL(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeon/r600d_common.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeon/r600d_common.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeon/r600d_common.h 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeon/r600d_common.h 2015-03-18 08:43:35.000000000 +0000 @@ -177,6 +177,8 @@ #define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19) #define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20) #define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21) +#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x7) << 24) +#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) & 0x1) << 27) #define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc #define S_028BDC_EXPAND_LINE_WIDTH(x) (((x) & 0x1) << 9) #define G_028BDC_EXPAND_LINE_WIDTH(x) (((x) >> 9) & 0x1) diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeon/r600_pipe_common.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeon/r600_pipe_common.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeon/r600_pipe_common.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeon/r600_pipe_common.h 2015-03-18 08:43:35.000000000 +0000 @@ -526,7 +526,7 @@ void cayman_init_msaa(struct pipe_context *ctx); void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples); void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, - int ps_iter_samples); + int ps_iter_samples, int overrast_samples); /* Inline helpers. */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 2015-03-18 08:43:35.000000000 +0000 @@ -1224,6 +1224,7 @@ emit_data->dst_type, emit_data->args, emit_data->arg_count, attr); } + void build_tgsi_intrinsic_nomem( const struct lp_build_tgsi_action * action, @@ -1233,12 +1234,126 @@ build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute); } -static void build_tgsi_intrinsic_readonly( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) +static void emit_bfi(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) { - build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadOnlyAttribute); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef bfi_args[3]; + + // Calculate the bitmask: (((1 << src3) - 1) << src2 + bfi_args[0] = LLVMBuildShl(builder, + LLVMBuildSub(builder, + LLVMBuildShl(builder, + bld_base->int_bld.one, + emit_data->args[3], ""), + bld_base->int_bld.one, ""), + emit_data->args[2], ""); + + bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1], + emit_data->args[2], ""); + + bfi_args[2] = emit_data->args[0]; + + /* Calculate: + * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) + * Use the right-hand side, which the LLVM backend can convert to V_BFI. + */ + emit_data->output[emit_data->chan] = + LLVMBuildXor(builder, bfi_args[2], + LLVMBuildAnd(builder, bfi_args[0], + LLVMBuildXor(builder, bfi_args[1], bfi_args[2], + ""), ""), ""); +} + +/* this is ffs in C */ +static void emit_lsb(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef args[2] = { + emit_data->args[0], + + /* The value of 1 means that ffs(x=0) = undef, so LLVM won't + * add special code to check for x=0. The reason is that + * the LLVM behavior for x=0 is different from what we + * need here. + * + * The hardware already implements the correct behavior. + */ + lp_build_const_int32(gallivm, 1) + }; + + emit_data->output[emit_data->chan] = + build_intrinsic(gallivm->builder, "llvm.cttz.i32", + emit_data->dst_type, args, Elements(args), + LLVMReadNoneAttribute); +} + +/* Find the last bit set. */ +static void emit_umsb(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef args[2] = { + emit_data->args[0], + /* Don't generate code for handling zero: */ + lp_build_const_int32(gallivm, 1) + }; + + LLVMValueRef msb = + build_intrinsic(builder, "llvm.ctlz.i32", + emit_data->dst_type, args, Elements(args), + LLVMReadNoneAttribute); + + /* The HW returns the last bit index from MSB, but TGSI wants + * the index from LSB. Invert it by doing "31 - msb". */ + msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31), + msb, ""); + + /* Check for zero: */ + emit_data->output[emit_data->chan] = + LLVMBuildSelect(builder, + LLVMBuildICmp(builder, LLVMIntEQ, args[0], + bld_base->uint_bld.zero, ""), + lp_build_const_int32(gallivm, -1), msb, ""); +} + +/* Find the last bit opposite of the sign bit. */ +static void emit_imsb(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef arg = emit_data->args[0]; + + LLVMValueRef msb = + build_intrinsic(builder, "llvm.AMDGPU.flbit.i32", + emit_data->dst_type, &arg, 1, + LLVMReadNoneAttribute); + + /* The HW returns the last bit index from MSB, but TGSI wants + * the index from LSB. Invert it by doing "31 - msb". */ + msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31), + msb, ""); + + /* If arg == 0 || arg == -1 (0xffffffff), return -1. */ + LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1); + + LLVMValueRef cond = + LLVMBuildOr(builder, + LLVMBuildICmp(builder, LLVMIntEQ, arg, + bld_base->uint_bld.zero, ""), + LLVMBuildICmp(builder, LLVMIntEQ, arg, + all_ones, ""), ""); + + emit_data->output[emit_data->chan] = + LLVMBuildSelect(builder, cond, all_ones, msb, ""); } void radeon_llvm_context_init(struct radeon_llvm_context * ctx) @@ -1295,20 +1410,23 @@ lp_set_default_actions(bld_base); - bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_readonly; + bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs"; - bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl; bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; + bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl; + bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi; bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; + bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev"; bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; - bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_readonly; + bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil"; bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp."; bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt"; bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; - bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_readonly; + bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32"; bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; @@ -1319,8 +1437,10 @@ bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp."; - bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_readonly; + bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor"; + bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32"; bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction."; bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; @@ -1331,6 +1451,8 @@ bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp; bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs."; + bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32"; bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit; @@ -1338,6 +1460,7 @@ bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax"; bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin"; + bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb; bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg; bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr; bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp; @@ -1348,14 +1471,18 @@ bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill"; bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp"; - bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_readonly; + bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb; + bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32"; bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp"; bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod; + bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb; bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; - bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_readonly; + bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32"; + bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32"; bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest."; @@ -1366,8 +1493,10 @@ bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp; bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp; bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp; - bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_readonly; + bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32"; + bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32"; bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg; bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex"; @@ -1392,6 +1521,8 @@ bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc"; bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; + bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32"; bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax"; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_hw_context.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_hw_context.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_hw_context.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_hw_context.c 2015-03-18 08:43:35.000000000 +0000 @@ -145,6 +145,7 @@ ctx->clip_regs.dirty = true; ctx->framebuffer.atom.dirty = true; + ctx->msaa_sample_locs.dirty = true; ctx->msaa_config.dirty = true; ctx->db_render_state.dirty = true; ctx->b.streamout.enable_atom.dirty = true; @@ -160,5 +161,6 @@ ctx->last_prim = -1; ctx->last_multi_vgt_param = -1; ctx->last_rast_prim = -1; + ctx->last_sc_line_stipple = ~0; ctx->emit_scratch_reloc = true; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_pipe.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_pipe.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_pipe.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_pipe.c 2015-03-18 08:43:35.000000000 +0000 @@ -112,6 +112,9 @@ sctx->cache_flush = si_atom_cache_flush; sctx->atoms.s.cache_flush = &sctx->cache_flush; + sctx->msaa_sample_locs = si_atom_msaa_sample_locs; + sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs; + sctx->msaa_config = si_atom_msaa_config; sctx->atoms.s.msaa_config = &sctx->msaa_config; @@ -230,6 +233,7 @@ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -264,7 +268,6 @@ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: - case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_FAKE_SW_MSAA: @@ -403,7 +406,7 @@ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: - return 0; + return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: /* Indirection of geometry shader input dimension is not * handled yet @@ -426,6 +429,8 @@ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: return 0; + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + return 1; } return 0; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_pipe.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_pipe.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_pipe.h 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_pipe.h 2015-03-18 08:43:35.000000000 +0000 @@ -41,12 +41,14 @@ * the number shouldn't be a commonly-used one. */ #define SI_BASE_VERTEX_UNKNOWN INT_MIN #define SI_RESTART_INDEX_UNKNOWN INT_MIN +#define SI_NUM_SMOOTH_AA_SAMPLES 8 #define SI_TRACE_CS 0 #define SI_TRACE_CS_DWORDS 6 #define SI_MAX_DRAW_CS_DWORDS \ - (/*derived prim state:*/ 6 + /*draw regs:*/ 16 + /*draw packets:*/ 31) + (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \ + /*draw regs:*/ 16 + /*draw packets:*/ 31) /* Instruction cache. */ #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0) @@ -151,6 +153,7 @@ struct r600_atom *streamout_begin; struct r600_atom *streamout_enable; /* must be after streamout_begin */ struct r600_atom *framebuffer; + struct r600_atom *msaa_sample_locs; struct r600_atom *db_render_state; struct r600_atom *msaa_config; struct r600_atom *clip_regs; @@ -160,8 +163,6 @@ struct si_framebuffer framebuffer; struct si_vertex_element *vertex_elements; - unsigned pa_sc_line_stipple; - unsigned pa_su_sc_mode_cntl; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; /* shaders */ @@ -181,8 +182,10 @@ unsigned border_color_offset; struct r600_atom clip_regs; + struct r600_atom msaa_sample_locs; struct r600_atom msaa_config; int ps_iter_samples; + bool smoothing_enabled; /* Vertex and index buffers. */ bool vertex_buffers_dirty; @@ -224,6 +227,7 @@ int last_prim; int last_multi_vgt_param; int last_rast_prim; + unsigned last_sc_line_stipple; int current_rast_prim; /* primitive type after TES, GS */ /* Scratch buffer */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_shader.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_shader.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_shader.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_shader.c 2015-03-18 08:43:35.000000000 +0000 @@ -192,6 +192,30 @@ } /** + * Get the value of a shader input parameter and extract a bitfield. + */ +static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx, + unsigned param, unsigned rshift, + unsigned bitwidth) +{ + struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; + LLVMValueRef value = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + param); + + if (rshift) + value = LLVMBuildLShr(gallivm->builder, value, + lp_build_const_int32(gallivm, rshift), ""); + + if (rshift + bitwidth < 32) { + unsigned mask = (1 << bitwidth) - 1; + value = LLVMBuildAnd(gallivm->builder, value, + lp_build_const_int32(gallivm, mask), ""); + } + + return value; +} + +/** * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad. * It's equivalent to doing a load from &base_ptr[index]. * @@ -561,14 +585,8 @@ static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld) { - struct gallivm_state *gallivm = &radeon_bld->gallivm; - LLVMValueRef value = LLVMGetParam(radeon_bld->main_fn, - SI_PARAM_ANCILLARY); - value = LLVMBuildLShr(gallivm->builder, value, - lp_build_const_int32(gallivm, 8), ""); - value = LLVMBuildAnd(gallivm->builder, value, - lp_build_const_int32(gallivm, 0xf), ""); - return value; + return unpack_param(si_shader_context(&radeon_bld->soa.bld_base), + SI_PARAM_ANCILLARY, 8, 4); } /** @@ -643,6 +661,15 @@ break; } + case TGSI_SEMANTIC_SAMPLEMASK: + /* Smoothing isn't MSAA in GL, but it's MSAA in hardware. + * Therefore, force gl_SampleMaskIn to 1 for GL. */ + if (si_shader_ctx->shader->key.ps.poly_line_smoothing) + value = uint_bld->one; + else + value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE); + break; + default: assert(!"unknown system value"); return; @@ -790,7 +817,7 @@ } static void si_alpha_test(struct lp_build_tgsi_context *bld_base, - LLVMValueRef *out_ptr) + LLVMValueRef alpha_ptr) { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -802,7 +829,7 @@ LLVMValueRef alpha_pass = lp_build_cmp(&bld_base->base, si_shader_ctx->shader->key.ps.alpha_func, - LLVMBuildLoad(gallivm->builder, out_ptr[3], ""), + LLVMBuildLoad(gallivm->builder, alpha_ptr, ""), alpha_ref); LLVMValueRef arg = lp_build_select(&bld_base->base, @@ -824,6 +851,34 @@ si_shader_ctx->shader->db_shader_control |= S_02880C_KILL_ENABLE(1); } +static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, + LLVMValueRef alpha_ptr) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef coverage, alpha; + + /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */ + coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_SAMPLE_COVERAGE); + coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage); + + coverage = build_intrinsic(gallivm->builder, "llvm.ctpop.i32", + bld_base->int_bld.elem_type, + &coverage, 1, LLVMReadNoneAttribute); + + coverage = LLVMBuildUIToFP(gallivm->builder, coverage, + bld_base->base.elem_type, ""); + + coverage = LLVMBuildFMul(gallivm->builder, coverage, + lp_build_const_float(gallivm, + 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), ""); + + alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, ""); + alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, ""); + LLVMBuildStore(gallivm->builder, alpha, alpha_ptr); +} + static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base, LLVMValueRef (*pos)[9], LLVMValueRef *out_elts) { @@ -976,16 +1031,9 @@ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); - LLVMValueRef so_param = - LLVMGetParam(shader->radeon_bld.main_fn, - shader->param_streamout_config); - /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ LLVMValueRef so_vtx_count = - LLVMBuildAnd(builder, - LLVMBuildLShr(builder, so_param, - LLVMConstInt(i32, 16, 0), ""), - LLVMConstInt(i32, 127, 0), ""); + unpack_param(shader, shader->param_streamout_config, 16, 7); LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32, NULL, 0, LLVMReadNoneAttribute); @@ -1333,6 +1381,7 @@ unsigned semantic_name = info->output_semantic_name[i]; unsigned semantic_index = info->output_semantic_index[i]; unsigned target; + LLVMValueRef alpha_ptr; /* Select the correct target */ switch (semantic_name) { @@ -1347,15 +1396,18 @@ continue; case TGSI_SEMANTIC_COLOR: target = V_008DFC_SQ_EXP_MRT + semantic_index; + alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3]; + if (si_shader_ctx->shader->key.ps.alpha_to_one) - LLVMBuildStore(bld_base->base.gallivm->builder, - bld_base->base.one, - si_shader_ctx->radeon_bld.soa.outputs[i][3]); + LLVMBuildStore(base->gallivm->builder, + base->one, alpha_ptr); if (semantic_index == 0 && si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) - si_alpha_test(bld_base, - si_shader_ctx->radeon_bld.soa.outputs[i]); + si_alpha_test(bld_base, alpha_ptr); + + if (si_shader_ctx->shader->key.ps.poly_line_smoothing) + si_scale_alpha_by_sample_mask(bld_base, alpha_ptr); break; default: target = 0; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_shader.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_shader.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_shader.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_shader.h 2015-03-18 08:43:35.000000000 +0000 @@ -126,6 +126,7 @@ unsigned alpha_func:3; unsigned alpha_to_one:1; unsigned poly_stipple:1; + unsigned poly_line_smoothing:1; } ps; struct { unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_state.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_state.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_state.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_state.c 2015-03-18 08:43:35.000000000 +0000 @@ -605,7 +605,6 @@ struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); struct si_pm4_state *pm4 = &rs->pm4; unsigned tmp; - unsigned prov_vtx = 1, polygon_dual_mode; float psize_min, psize_max; if (rs == NULL) { @@ -617,29 +616,14 @@ rs->clip_plane_enable = state->clip_plane_enable; rs->line_stipple_enable = state->line_stipple_enable; rs->poly_stipple_enable = state->poly_stipple_enable; - - polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || - state->fill_back != PIPE_POLYGON_MODE_FILL); - - if (state->flatshade_first) - prov_vtx = 0; + rs->line_smooth = state->line_smooth; + rs->poly_smooth = state->poly_smooth; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; rs->pa_sc_line_stipple = state->line_stipple_enable ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; - rs->pa_su_sc_mode_cntl = - S_028814_PROVOKING_VTX_LAST(prov_vtx) | - S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | - S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | - S_028814_POLY_MODE(polygon_dual_mode) | - S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | - S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)); rs->pa_cl_clip_cntl = S_028810_PS_UCP_MODE(3) | S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | @@ -686,7 +670,9 @@ si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | - S_028A48_MSAA_ENABLE(state->multisample) | + S_028A48_MSAA_ENABLE(state->multisample || + state->poly_smooth || + state->line_smooth) | S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, @@ -694,7 +680,18 @@ S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); - + si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, + S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | + S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | + S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || + state->fill_back != PIPE_POLYGON_MODE_FILL) | + S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | + S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); return rs; } @@ -708,10 +705,6 @@ if (state == NULL) return; - // TODO - sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple; - sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl; - if (sctx->framebuffer.nr_samples > 1 && (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) sctx->db_render_state.dirty = true; @@ -720,7 +713,6 @@ si_update_fb_rs_state(sctx); sctx->clip_regs.dirty = true; - sctx->last_rast_prim = -1; /* reset this so that it gets updated */ } static void si_delete_rs_state(struct pipe_context *ctx, void *state) @@ -945,10 +937,15 @@ r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0); } - db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | - S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | + db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | sctx->ps_db_shader_control; + /* Bug workaround for smoothing (overrasterization) on SI. */ + if (sctx->b.chip_class == SI && sctx->smoothing_enabled) + db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); + else + db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; @@ -2093,6 +2090,19 @@ constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, SI_DRIVER_STATE_CONST_BUF, &constbuf); + + /* Smoothing (only possible with nr_samples == 1) uses the same + * sample locations as the MSAA it simulates. + * + * Therefore, don't update the sample locations when + * transitioning from no AA to smoothing-equivalent AA, and + * vice versa. + */ + if ((sctx->framebuffer.nr_samples != 1 || + old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) && + (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES || + old_nr_samples != 1)) + sctx->msaa_sample_locs.dirty = true; } } @@ -2196,17 +2206,29 @@ /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); +} + +static void si_emit_msaa_sample_locs(struct r600_common_context *rctx, + struct r600_atom *atom) +{ + struct si_context *sctx = (struct si_context *)rctx; + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + unsigned nr_samples = sctx->framebuffer.nr_samples; - cayman_emit_msaa_sample_locs(cs, sctx->framebuffer.nr_samples); + cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples : + SI_NUM_SMOOTH_AA_SAMPLES); } +const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */ + static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom) { struct si_context *sctx = (struct si_context *)rctx; struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, - sctx->ps_iter_samples); + sctx->ps_iter_samples, + sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0); } const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_state_draw.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_state_draw.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_state_draw.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_state_draw.c 2015-03-18 08:43:35.000000000 +0000 @@ -149,27 +149,51 @@ S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0); } +static void si_emit_scratch_reloc(struct si_context *sctx) +{ + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + + if (!sctx->emit_scratch_reloc) + return; + + r600_write_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, + sctx->spi_tmpring_size); + + if (sctx->scratch_buffer) { + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, + sctx->scratch_buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_RESOURCE_RW); + + } + sctx->emit_scratch_reloc = false; +} + /* rast_prim is the primitive type after GS. */ static void si_emit_rasterizer_prim_state(struct si_context *sctx) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; unsigned rast_prim = sctx->current_rast_prim; + struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer; - if (rast_prim == sctx->last_rast_prim) + /* Skip this if not rendering lines. */ + if (rast_prim != PIPE_PRIM_LINES && + rast_prim != PIPE_PRIM_LINE_LOOP && + rast_prim != PIPE_PRIM_LINE_STRIP && + rast_prim != PIPE_PRIM_LINES_ADJACENCY && + rast_prim != PIPE_PRIM_LINE_STRIP_ADJACENCY) + return; + + if (rast_prim == sctx->last_rast_prim && + rs->pa_sc_line_stipple == sctx->last_sc_line_stipple) return; r600_write_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE, - sctx->pa_sc_line_stipple | + rs->pa_sc_line_stipple | S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : rast_prim == PIPE_PRIM_LINE_STRIP ? 2 : 0)); - r600_write_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, - sctx->pa_su_sc_mode_cntl | - S_028814_PROVOKING_VTX_LAST(rast_prim == PIPE_PRIM_QUADS || - rast_prim == PIPE_PRIM_QUAD_STRIP || - rast_prim == PIPE_PRIM_POLYGON)); - sctx->last_rast_prim = rast_prim; + sctx->last_sc_line_stipple = rs->pa_sc_line_stipple; } static void si_emit_draw_registers(struct si_context *sctx, @@ -570,20 +594,6 @@ if (sctx->b.flags) sctx->atoms.s.cache_flush->dirty = true; - if (sctx->emit_scratch_reloc) { - struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - r600_write_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, - sctx->spi_tmpring_size); - - if (sctx->scratch_buffer) { - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, - sctx->scratch_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); - - } - sctx->emit_scratch_reloc = false; - } - si_need_cs_space(sctx, 0, TRUE); /* Emit states. */ @@ -595,6 +605,7 @@ } si_pm4_emit_dirty(sctx); + si_emit_scratch_reloc(sctx); si_emit_rasterizer_prim_state(sctx); si_emit_draw_registers(sctx, info); si_emit_draw_packets(sctx, info, &ib); diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_state.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_state.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_state.h 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_state.h 2015-03-18 08:43:35.000000000 +0000 @@ -62,12 +62,13 @@ bool line_stipple_enable; unsigned sprite_coord_enable; unsigned pa_sc_line_stipple; - unsigned pa_su_sc_mode_cntl; unsigned pa_cl_clip_cntl; unsigned clip_plane_enable; float offset_units; float offset_scale; bool poly_stipple_enable; + bool line_smooth; + bool poly_smooth; }; struct si_state_dsa { @@ -269,6 +270,7 @@ /* si_state_draw.c */ extern const struct r600_atom si_atom_cache_flush; +extern const struct r600_atom si_atom_msaa_sample_locs; extern const struct r600_atom si_atom_msaa_config; void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom); void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo); diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_state_shaders.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_state_shaders.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/radeonsi/si_state_shaders.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/radeonsi/si_state_shaders.c 2015-03-18 08:43:35.000000000 +0000 @@ -366,33 +366,38 @@ key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs; } } else if (sel->type == PIPE_SHADER_FRAGMENT) { + struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; + if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; key->ps.export_16bpc = sctx->framebuffer.export_16bpc; - if (sctx->queued.named.rasterizer) { - key->ps.color_two_side = sctx->queued.named.rasterizer->two_side; + if (rs) { + bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES && + sctx->current_rast_prim <= PIPE_PRIM_POLYGON) || + sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY; + bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS; + + key->ps.color_two_side = rs->two_side; if (sctx->queued.named.blend) { key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one && - sctx->queued.named.rasterizer->multisample_enable && + rs->multisample_enable && !sctx->framebuffer.cb0_is_integer; } - key->ps.poly_stipple = sctx->queued.named.rasterizer->poly_stipple_enable && - ((sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES && - sctx->current_rast_prim <= PIPE_PRIM_POLYGON) || - sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY); + key->ps.poly_stipple = rs->poly_stipple_enable && is_poly; + key->ps.poly_line_smoothing = ((is_poly && rs->poly_smooth) || + (is_line && rs->line_smooth)) && + sctx->framebuffer.nr_samples <= 1; } - if (sctx->queued.named.dsa) { - key->ps.alpha_func = sctx->queued.named.dsa->alpha_func; - /* Alpha-test should be disabled if colorbuffer 0 is integer. */ - if (sctx->framebuffer.cb0_is_integer) - key->ps.alpha_func = PIPE_FUNC_ALWAYS; - } else { - key->ps.alpha_func = PIPE_FUNC_ALWAYS; - } + key->ps.alpha_func = PIPE_FUNC_ALWAYS; + + /* Alpha-test should be disabled if colorbuffer 0 is integer. */ + if (sctx->queued.named.dsa && + !sctx->framebuffer.cb0_is_integer) + key->ps.alpha_func = sctx->queued.named.dsa->alpha_func; } } @@ -921,6 +926,14 @@ sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control; sctx->db_render_state.dirty = true; } + + if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing; + sctx->msaa_config.dirty = true; + + if (sctx->b.chip_class == SI) + sctx->db_render_state.dirty = true; + } } void si_init_shader_functions(struct si_context *sctx) diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/svga/svga_screen.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/svga/svga_screen.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/svga/svga_screen.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/svga/svga_screen.c 2015-03-18 08:43:35.000000000 +0000 @@ -375,6 +375,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } /* If we get here, we failed to handle a cap above */ @@ -431,6 +432,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; } /* If we get here, we failed to handle a cap above */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/vc4/vc4_screen.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/vc4/vc4_screen.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/drivers/vc4/vc4_screen.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/drivers/vc4/vc4_screen.c 2015-03-18 08:43:35.000000000 +0000 @@ -319,6 +319,7 @@ case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/include/pipe/p_defines.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/include/pipe/p_defines.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/include/pipe/p_defines.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/include/pipe/p_defines.h 2015-03-18 08:43:35.000000000 +0000 @@ -644,6 +644,7 @@ PIPE_SHADER_CAP_DOUBLES, PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */ PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED, + PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED, }; /** diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/include/pipe/p_shader_tokens.h mesa-10.6.0~git20150318.27bf37ba/src/gallium/include/pipe/p_shader_tokens.h --- mesa-10.6.0~git20150310.5750595c/src/gallium/include/pipe/p_shader_tokens.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/include/pipe/p_shader_tokens.h 2015-03-18 08:43:35.000000000 +0000 @@ -306,7 +306,7 @@ #define TGSI_OPCODE_MAD 16 #define TGSI_OPCODE_SUB 17 #define TGSI_OPCODE_LRP 18 - /* gap */ +#define TGSI_OPCODE_FMA 19 #define TGSI_OPCODE_SQRT 20 #define TGSI_OPCODE_DP2A 21 /* gap */ @@ -404,7 +404,7 @@ #define TGSI_OPCODE_BREAKC 115 #define TGSI_OPCODE_KILL_IF 116 /* conditional kill */ #define TGSI_OPCODE_END 117 /* aka HALT */ - /* gap */ +#define TGSI_OPCODE_DFMA 118 #define TGSI_OPCODE_F2I 119 #define TGSI_OPCODE_IDIV 120 #define TGSI_OPCODE_IMAX 121 @@ -510,7 +510,7 @@ #define TGSI_OPCODE_DSNE 206 /* SM5 */ #define TGSI_OPCODE_DRCP 207 /* eg, cayman */ #define TGSI_OPCODE_DSQRT 208 /* eg, cayman also has DRSQ */ -#define TGSI_OPCODE_DMAD 209 /* DFMA? */ +#define TGSI_OPCODE_DMAD 209 #define TGSI_OPCODE_DFRAC 210 /* eg, cayman */ #define TGSI_OPCODE_DLDEXP 211 /* eg, cayman */ #define TGSI_OPCODE_DFRACEXP 212 /* eg, cayman */ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/state_trackers/dri/Makefile.am mesa-10.6.0~git20150318.27bf37ba/src/gallium/state_trackers/dri/Makefile.am --- mesa-10.6.0~git20150310.5750595c/src/gallium/state_trackers/dri/Makefile.am 2015-01-14 13:02:07.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/state_trackers/dri/Makefile.am 2015-03-18 08:43:35.000000000 +0000 @@ -30,7 +30,6 @@ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ - -I$(top_srcdir)/src/gallium/state_trackers/dri/common \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_builddir)/src/mesa/drivers/dri/common \ $(GALLIUM_CFLAGS) \ diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/state_trackers/dri/SConscript mesa-10.6.0~git20150318.27bf37ba/src/gallium/state_trackers/dri/SConscript --- mesa-10.6.0~git20150310.5750595c/src/gallium/state_trackers/dri/SConscript 2014-09-10 05:44:12.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/state_trackers/dri/SConscript 2015-03-18 08:43:35.000000000 +0000 @@ -14,7 +14,6 @@ '#/src', '#/src/mapi', '#/src/mesa', - '#/src/gallium/state_trackers/dri/common', '#/src/mesa/drivers/dri/common', xmlpool_options.dir.dir, # Dir to generated xmlpool/options.h ]) diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/state_trackers/glx/xlib/glx_api.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/state_trackers/glx/xlib/glx_api.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/state_trackers/glx/xlib/glx_api.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/state_trackers/glx/xlib/glx_api.c 2015-03-18 08:43:35.000000000 +0000 @@ -34,6 +34,7 @@ #include "GL/glx.h" #include +#include #include #include @@ -644,7 +645,7 @@ ext = dpy->ext_procs; /* new extension is at head of list */ assert(c->extension == ext->codes.extension); (void) c; - ext->name = _mesa_strdup(extName); + ext->name = strdup(extName); ext->close_display = close_display_callback; } } diff -Nru mesa-10.6.0~git20150310.5750595c/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c mesa-10.6.0~git20150318.27bf37ba/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c --- mesa-10.6.0~git20150310.5750595c/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c 2015-03-18 08:43:35.000000000 +0000 @@ -51,16 +51,14 @@ #include "state_tracker/sw_winsys.h" #include "state_tracker/drm_driver.h" +#include "kms_dri_sw_winsys.h" -#if 0 +#ifdef DEBUG #define DEBUG_PRINT(msg, ...) fprintf(stderr, msg, __VA_ARGS__) #else #define DEBUG_PRINT(msg, ...) #endif -struct sw_winsys; - -struct sw_winsys *kms_dri_create_winsys(int fd); struct kms_sw_displaytarget { diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/builtin_functions.cpp mesa-10.6.0~git20150318.27bf37ba/src/glsl/builtin_functions.cpp --- mesa-10.6.0~git20150310.5750595c/src/glsl/builtin_functions.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/builtin_functions.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -201,7 +201,7 @@ shader_packing_or_es3(const _mesa_glsl_parse_state *state) { return state->ARB_shading_language_packing_enable || - state->is_version(400, 300); + state->is_version(420, 300); } static bool diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/list.h mesa-10.6.0~git20150318.27bf37ba/src/glsl/list.h --- mesa-10.6.0~git20150310.5750595c/src/glsl/list.h 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/list.h 2015-03-18 08:43:35.000000000 +0000 @@ -684,7 +684,7 @@ exec_node_data(__type, (__list)->head, __field), \ * __next = \ exec_node_data(__type, (__node)->__field.next, __field); \ - __next != NULL; \ + (__node)->__field.next != NULL; \ __node = __next, __next = \ exec_node_data(__type, (__next)->__field.next, __field)) @@ -693,7 +693,7 @@ exec_node_data(__type, (__list)->tail_pred, __field), \ * __prev = \ exec_node_data(__type, (__node)->__field.prev, __field); \ - __prev != NULL; \ + (__node)->__field.prev != NULL; \ __node = __prev, __prev = \ exec_node_data(__type, (__prev)->__field.prev, __field)) diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_intrinsics.h mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_intrinsics.h --- mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_intrinsics.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_intrinsics.h 2015-03-18 08:43:35.000000000 +0000 @@ -95,6 +95,8 @@ SYSTEM_VALUE(front_face, 1) SYSTEM_VALUE(vertex_id, 1) +SYSTEM_VALUE(vertex_id_zero_base, 1) +SYSTEM_VALUE(base_vertex, 1) SYSTEM_VALUE(instance_id, 1) SYSTEM_VALUE(sample_id, 1) SYSTEM_VALUE(sample_pos, 2) diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_lower_system_values.c mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_lower_system_values.c --- mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_lower_system_values.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_lower_system_values.c 2015-03-18 08:43:35.000000000 +0000 @@ -49,6 +49,12 @@ case SYSTEM_VALUE_VERTEX_ID: op = nir_intrinsic_load_vertex_id; break; + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: + op = nir_intrinsic_load_vertex_id_zero_base; + break; + case SYSTEM_VALUE_BASE_VERTEX: + op = nir_intrinsic_load_base_vertex; + break; case SYSTEM_VALUE_INSTANCE_ID: op = nir_intrinsic_load_instance_id; break; diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_lower_vars_to_ssa.c mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_lower_vars_to_ssa.c --- mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_lower_vars_to_ssa.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_lower_vars_to_ssa.c 2015-03-18 08:43:35.000000000 +0000 @@ -35,6 +35,13 @@ bool lower_to_ssa; + /* Only valid for things that end up in the direct list. + * Note that multiple nir_deref_vars may correspond to this node, but they + * will all be equivalent, so any is as good as the other. + */ + nir_deref_var *deref; + struct exec_node direct_derefs_link; + struct set *loads; struct set *stores; struct set *copies; @@ -69,7 +76,7 @@ * wildcards and no indirects, these are precisely the derefs that we * can actually consider lowering. */ - struct hash_table *direct_deref_nodes; + struct exec_list direct_deref_nodes; /* Controls whether get_deref_node will add variables to the * direct_deref_nodes table. This is turned on when we are initially @@ -83,88 +90,6 @@ struct hash_table *phi_table; }; -/* The following two functions implement a hash and equality check for - * variable dreferences. When the hash or equality function encounters an - * array, all indirects are treated as equal and are never equal to a - * direct dereference or a wildcard. - */ -static uint32_t -hash_deref(const void *void_deref) -{ - uint32_t hash = _mesa_fnv32_1a_offset_bias; - - const nir_deref_var *deref_var = void_deref; - hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var); - - for (const nir_deref *deref = deref_var->deref.child; - deref; deref = deref->child) { - switch (deref->deref_type) { - case nir_deref_type_array: { - nir_deref_array *deref_array = nir_deref_as_array(deref); - - hash = _mesa_fnv32_1a_accumulate(hash, deref_array->deref_array_type); - - if (deref_array->deref_array_type == nir_deref_array_type_direct) - hash = _mesa_fnv32_1a_accumulate(hash, deref_array->base_offset); - break; - } - case nir_deref_type_struct: { - nir_deref_struct *deref_struct = nir_deref_as_struct(deref); - hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index); - break; - } - default: - assert("Invalid deref chain"); - } - } - - return hash; -} - -static bool -derefs_equal(const void *void_a, const void *void_b) -{ - const nir_deref_var *a_var = void_a; - const nir_deref_var *b_var = void_b; - - if (a_var->var != b_var->var) - return false; - - for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child; - a != NULL; a = a->child, b = b->child) { - if (a->deref_type != b->deref_type) - return false; - - switch (a->deref_type) { - case nir_deref_type_array: { - nir_deref_array *a_arr = nir_deref_as_array(a); - nir_deref_array *b_arr = nir_deref_as_array(b); - - if (a_arr->deref_array_type != b_arr->deref_array_type) - return false; - - if (a_arr->deref_array_type == nir_deref_array_type_direct && - a_arr->base_offset != b_arr->base_offset) - return false; - break; - } - case nir_deref_type_struct: - if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index) - return false; - break; - default: - assert("Invalid deref chain"); - return false; - } - - assert((a->child == NULL) == (b->child == NULL)); - if((a->child == NULL) != (b->child == NULL)) - return false; - } - - return true; -} - static int type_get_length(const struct glsl_type *type) { @@ -195,6 +120,8 @@ struct deref_node *node = rzalloc_size(mem_ctx, size); node->type = type; node->parent = parent; + node->deref = NULL; + exec_node_init(&node->direct_derefs_link); return node; } @@ -297,8 +224,14 @@ assert(node); - if (is_direct && state->add_to_direct_deref_nodes) - _mesa_hash_table_insert(state->direct_deref_nodes, deref, node); + /* Only insert if it isn't already in the list. */ + if (is_direct && state->add_to_direct_deref_nodes && + node->direct_derefs_link.next == NULL) { + node->deref = deref; + assert(deref->var != NULL); + exec_list_push_tail(&state->direct_deref_nodes, + &node->direct_derefs_link); + } return node; } @@ -917,10 +850,8 @@ unsigned w_start, w_end; unsigned iter_count = 0; - struct hash_entry *deref_entry; - hash_table_foreach(state->direct_deref_nodes, deref_entry) { - struct deref_node *node = deref_entry->data; - + foreach_list_typed(struct deref_node, node, direct_derefs_link, + &state->direct_deref_nodes) { if (node->stores == NULL) continue; @@ -1014,8 +945,7 @@ state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); - state.direct_deref_nodes = _mesa_hash_table_create(state.dead_ctx, - hash_deref, derefs_equal); + exec_list_make_empty(&state.direct_deref_nodes); state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -1035,18 +965,17 @@ /* We're about to iterate through direct_deref_nodes. Don't modify it. */ state.add_to_direct_deref_nodes = false; - struct hash_entry *entry; - hash_table_foreach(state.direct_deref_nodes, entry) { - nir_deref_var *deref = (void *)entry->key; - struct deref_node *node = entry->data; + foreach_list_typed_safe(struct deref_node, node, direct_derefs_link, + &state.direct_deref_nodes) { + nir_deref_var *deref = node->deref; if (deref->var->data.mode != nir_var_local) { - _mesa_hash_table_remove(state.direct_deref_nodes, entry); + exec_node_remove(&node->direct_derefs_link); continue; } if (deref_may_be_aliased(deref, &state)) { - _mesa_hash_table_remove(state.direct_deref_nodes, entry); + exec_node_remove(&node->direct_derefs_link); continue; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_opt_algebraic.py mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_opt_algebraic.py --- mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_opt_algebraic.py 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_opt_algebraic.py 2015-03-18 08:43:35.000000000 +0000 @@ -56,6 +56,10 @@ (('iabs', ('ineg', a)), ('iabs', a)), (('fadd', a, 0.0), a), (('iadd', a, 0), a), + (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), + (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), + (('fadd', ('fneg', a), a), 0.0), + (('iadd', ('ineg', a), a), 0), (('fmul', a, 0.0), 0.0), (('imul', a, 0), 0), (('fmul', a, 1.0), a), diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_opt_peephole_select.c mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_opt_peephole_select.c --- mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_opt_peephole_select.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_opt_peephole_select.c 2015-03-18 08:43:35.000000000 +0000 @@ -52,36 +52,66 @@ }; static bool -are_all_move_to_phi(nir_block *block) +block_check_for_allowed_instrs(nir_block *block) { nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_alu) - return false; + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: + switch (intrin->variables[0]->var->data.mode) { + case nir_var_shader_in: + case nir_var_uniform: + break; + + default: + return false; + } + break; - /* It must be a move operation */ - nir_alu_instr *mov = nir_instr_as_alu(instr); - if (mov->op != nir_op_fmov && mov->op != nir_op_imov) - return false; + default: + return false; + } - /* Can't handle saturate */ - if (mov->dest.saturate) - return false; + break; + } - /* It must be SSA */ - if (!mov->dest.dest.is_ssa) - return false; + case nir_instr_type_load_const: + break; - /* It cannot have any if-uses */ - if (mov->dest.dest.ssa.if_uses->entries != 0) - return false; + case nir_instr_type_alu: { + /* It must be a move operation */ + nir_alu_instr *mov = nir_instr_as_alu(instr); + if (mov->op != nir_op_fmov && mov->op != nir_op_imov) + return false; + + /* Can't handle saturate */ + if (mov->dest.saturate) + return false; + + /* It must be SSA */ + if (!mov->dest.dest.is_ssa) + return false; - /* The only uses of this definition must be phi's in the successor */ - struct set_entry *entry; - set_foreach(mov->dest.dest.ssa.uses, entry) { - const nir_instr *dest_instr = entry->key; - if (dest_instr->type != nir_instr_type_phi || - dest_instr->block != block->successors[0]) + /* It cannot have any if-uses */ + if (mov->dest.dest.ssa.if_uses->entries != 0) return false; + + /* The only uses of this definition must be phi's in the successor */ + struct set_entry *entry; + set_foreach(mov->dest.dest.ssa.uses, entry) { + const nir_instr *dest_instr = entry->key; + if (dest_instr->type != nir_instr_type_phi || + dest_instr->block != block->successors[0]) + return false; + } + break; + } + + default: + return false; } } @@ -119,8 +149,9 @@ nir_block *then_block = nir_cf_node_as_block(then_node); nir_block *else_block = nir_cf_node_as_block(else_node); - /* ... and those blocks must only contain move-to-phi. */ - if (!are_all_move_to_phi(then_block) || !are_all_move_to_phi(else_block)) + /* ... and those blocks must only contain "allowed" instructions. */ + if (!block_check_for_allowed_instrs(then_block) || + !block_check_for_allowed_instrs(else_block)) return true; /* At this point, we know that the previous CFG node is an if-then @@ -129,6 +160,25 @@ * selects. */ + nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node)); + assert(prev_block->cf_node.type == nir_cf_node_block); + + /* First, we move the remaining instructions from the blocks to the + * block before. We have already guaranteed that this is safe by + * calling block_check_for_allowed_instrs() + */ + nir_foreach_instr_safe(then_block, instr) { + exec_node_remove(&instr->node); + instr->block = prev_block; + exec_list_push_tail(&prev_block->instr_list, &instr->node); + } + + nir_foreach_instr_safe(else_block, instr) { + exec_node_remove(&instr->node); + instr->block = prev_block; + exec_list_push_tail(&prev_block->instr_list, &instr->node); + } + nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_phi) break; @@ -145,19 +195,7 @@ assert(src->src.is_ssa); unsigned idx = src->pred == then_block ? 1 : 2; - - if (src->src.ssa->parent_instr->block == src->pred) { - /* We already know that this instruction must be a move with - * this phi's in this block as its only users. - */ - nir_alu_instr *mov = nir_instr_as_alu(src->src.ssa->parent_instr); - assert(mov->instr.type == nir_instr_type_alu); - assert(mov->op == nir_op_fmov || mov->op == nir_op_imov); - - nir_alu_src_copy(&sel->src[idx], &mov->src[0], state->mem_ctx); - } else { - nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx); - } + nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx); } nir_ssa_dest_init(&sel->instr, &sel->dest.dest, diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_worklist.c mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_worklist.c --- mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_worklist.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_worklist.c 2015-03-18 08:43:35.000000000 +0000 @@ -82,7 +82,7 @@ } nir_block * -nir_block_worklist_peek_head(nir_block_worklist *w) +nir_block_worklist_peek_head(const nir_block_worklist *w) { assert(w->count > 0); @@ -114,18 +114,18 @@ w->count++; - unsigned tail = w->start = (w->start + w->count - 1) % w->size; + unsigned tail = (w->start + w->count - 1) % w->size; w->blocks[tail] = block; BITSET_SET(w->blocks_present, block->index); } nir_block * -nir_block_worklist_peek_tail(nir_block_worklist *w) +nir_block_worklist_peek_tail(const nir_block_worklist *w) { assert(w->count > 0); - unsigned tail = w->start = (w->start + w->count - 1) % w->size; + unsigned tail = (w->start + w->count - 1) % w->size; return w->blocks[tail]; } @@ -135,7 +135,7 @@ { assert(w->count > 0); - unsigned tail = w->start = (w->start + w->count - 1) % w->size; + unsigned tail = (w->start + w->count - 1) % w->size; w->count--; diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_worklist.h mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_worklist.h --- mesa-10.6.0~git20150310.5750595c/src/glsl/nir/nir_worklist.h 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/nir/nir_worklist.h 2015-03-18 08:43:35.000000000 +0000 @@ -74,13 +74,13 @@ void nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block); -nir_block *nir_block_worklist_peek_head(nir_block_worklist *w); +nir_block *nir_block_worklist_peek_head(const nir_block_worklist *w); nir_block *nir_block_worklist_pop_head(nir_block_worklist *w); void nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block); -nir_block *nir_block_worklist_peek_tail(nir_block_worklist *w); +nir_block *nir_block_worklist_peek_tail(const nir_block_worklist *w); nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w); diff -Nru mesa-10.6.0~git20150310.5750595c/src/glsl/opt_algebraic.cpp mesa-10.6.0~git20150318.27bf37ba/src/glsl/opt_algebraic.cpp --- mesa-10.6.0~git20150310.5750595c/src/glsl/opt_algebraic.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glsl/opt_algebraic.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -626,9 +626,18 @@ if (!is_vec_zero(zero)) continue; - return new(mem_ctx) ir_expression(ir->operation, - add->operands[0], - neg(add->operands[1])); + /* Depending of the zero position we want to optimize + * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) + */ + if (add_pos == 1) { + return new(mem_ctx) ir_expression(ir->operation, + neg(add->operands[0]), + add->operands[1]); + } else { + return new(mem_ctx) ir_expression(ir->operation, + add->operands[0], + neg(add->operands[1])); + } } break; diff -Nru mesa-10.6.0~git20150310.5750595c/src/glx/glxclient.h mesa-10.6.0~git20150318.27bf37ba/src/glx/glxclient.h --- mesa-10.6.0~git20150310.5750595c/src/glx/glxclient.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glx/glxclient.h 2015-03-18 08:43:35.000000000 +0000 @@ -48,12 +48,10 @@ #include #include #include +#include #include "GL/glxproto.h" #include "glxconfig.h" #include "glxhash.h" -#if defined( HAVE_PTHREAD ) -# include -#endif #include "util/macros.h" #include "glxextensions.h" @@ -629,7 +627,6 @@ extern int __glXDebug; /* This is per-thread storage in an MT environment */ -#if defined( HAVE_PTHREAD ) extern void __glXSetCurrentContext(struct glx_context * c); @@ -646,14 +643,6 @@ # endif /* defined( GLX_USE_TLS ) */ -#else - -extern struct glx_context *__glXcurrentContext; -#define __glXGetCurrentContext() __glXcurrentContext -#define __glXSetCurrentContext(gc) __glXcurrentContext = gc - -#endif /* defined( HAVE_PTHREAD ) */ - extern void __glXSetCurrentContextNull(void); @@ -661,14 +650,9 @@ ** Global lock for all threads in this address space using the GLX ** extension */ -#if defined( HAVE_PTHREAD ) extern pthread_mutex_t __glXmutex; #define __glXLock() pthread_mutex_lock(&__glXmutex) #define __glXUnlock() pthread_mutex_unlock(&__glXmutex) -#else -#define __glXLock() -#define __glXUnlock() -#endif /* ** Setup for a command. Initialize the extension for dpy if necessary. diff -Nru mesa-10.6.0~git20150310.5750595c/src/glx/glxcurrent.c mesa-10.6.0~git20150318.27bf37ba/src/glx/glxcurrent.c --- mesa-10.6.0~git20150310.5750595c/src/glx/glxcurrent.c 2014-08-12 16:09:46.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glx/glxcurrent.c 2015-03-18 08:43:35.000000000 +0000 @@ -33,9 +33,7 @@ * Client-side GLX interface for current context management. */ -#ifdef HAVE_PTHREAD #include -#endif #include "glxclient.h" @@ -67,8 +65,6 @@ * Current context management and locking */ -#if defined( HAVE_PTHREAD ) - _X_HIDDEN pthread_mutex_t __glXmutex = PTHREAD_MUTEX_INITIALIZER; # if defined( GLX_USE_TLS ) @@ -138,17 +134,6 @@ # endif /* defined( GLX_USE_TLS ) */ -#elif defined( THREADS ) - -#error Unknown threading method specified. - -#else - -/* not thread safe */ -_X_HIDDEN struct glx_context *__glXcurrentContext = &dummyContext; - -#endif - _X_HIDDEN void __glXSetCurrentContextNull(void) diff -Nru mesa-10.6.0~git20150310.5750595c/src/glx/glxext.c mesa-10.6.0~git20150318.27bf37ba/src/glx/glxext.c --- mesa-10.6.0~git20150310.5750595c/src/glx/glxext.c 2014-08-12 16:09:46.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glx/glxext.c 2015-03-18 08:43:35.000000000 +0000 @@ -143,8 +143,13 @@ aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo; aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo; - if (awire->sbc < glxDraw->lastEventSbc) - glxDraw->eventSbcWrap += 0x100000000; + /* Handle 32-Bit wire sbc wraparound in both directions to cope with out + * of sequence 64-Bit sbc's + */ + if ((int64_t) awire->sbc < ((int64_t) glxDraw->lastEventSbc - 0x40000000)) + glxDraw->eventSbcWrap += 0x100000000; + if ((int64_t) awire->sbc > ((int64_t) glxDraw->lastEventSbc + 0x40000000)) + glxDraw->eventSbcWrap -= 0x100000000; glxDraw->lastEventSbc = awire->sbc; aevent->sbc = awire->sbc + glxDraw->eventSbcWrap; return True; diff -Nru mesa-10.6.0~git20150310.5750595c/src/glx/tests/fake_glx_screen.cpp mesa-10.6.0~git20150318.27bf37ba/src/glx/tests/fake_glx_screen.cpp --- mesa-10.6.0~git20150310.5750595c/src/glx/tests/fake_glx_screen.cpp 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/glx/tests/fake_glx_screen.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -77,7 +77,7 @@ __thread void *__glX_tls_Context = NULL; -#if defined(HAVE_PTHREAD) && !defined(GLX_USE_TLS) +#if !defined(GLX_USE_TLS) extern "C" struct glx_context * __glXGetCurrentContext() { diff -Nru mesa-10.6.0~git20150310.5750595c/src/loader/loader.c mesa-10.6.0~git20150318.27bf37ba/src/loader/loader.c --- mesa-10.6.0~git20150310.5750595c/src/loader/loader.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/loader/loader.c 2015-03-18 08:43:35.000000000 +0000 @@ -64,6 +64,7 @@ * Rob Clark */ +#include #include #include #include @@ -80,7 +81,6 @@ #endif #endif #ifdef HAVE_SYSFS -#include #include #endif #include "loader.h" diff -Nru mesa-10.6.0~git20150310.5750595c/src/mapi/glapi/gen/ARB_direct_state_access.xml mesa-10.6.0~git20150318.27bf37ba/src/mapi/glapi/gen/ARB_direct_state_access.xml --- mesa-10.6.0~git20150310.5750595c/src/mapi/glapi/gen/ARB_direct_state_access.xml 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mapi/glapi/gen/ARB_direct_state_access.xml 2015-03-18 08:43:35.000000000 +0000 @@ -7,6 +7,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-10.6.0~git20150310.5750595c/src/mapi/glapi/gen/gl_enums.py mesa-10.6.0~git20150318.27bf37ba/src/mapi/glapi/gen/gl_enums.py --- mesa-10.6.0~git20150310.5750595c/src/mapi/glapi/gen/gl_enums.py 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mapi/glapi/gen/gl_enums.py 2015-03-18 08:43:35.000000000 +0000 @@ -157,6 +157,12 @@ string_offsets = {} i = 0; + print '#if defined(__GNUC__)' + print '# define LONGSTRING __extension__' + print '#else' + print '# define LONGSTRING' + print '#endif' + print '' print 'LONGSTRING static const char enum_string_table[] = ' for enum, name in enum_table: print ' "%s\\0"' % (name) diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/common/meta_tex_subimage.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/common/meta_tex_subimage.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/common/meta_tex_subimage.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/common/meta_tex_subimage.c 2015-03-18 08:43:35.000000000 +0000 @@ -150,9 +150,6 @@ bool success = false; int z; - /* XXX: This should probably be passed in from somewhere */ - const char *where = "_mesa_meta_pbo_TexSubImage"; - if (!_mesa_is_bufferobj(packing->BufferObj) && !create_pbo) return false; @@ -165,19 +162,6 @@ if (ctx->_ImageTransferState) return false; - if (!_mesa_validate_pbo_access(dims, packing, width, height, depth, - format, type, INT_MAX, pixels)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(out of bounds PBO access)", where); - return true; - } - - if (_mesa_check_disallowed_mapping(packing->BufferObj)) { - /* buffer is mapped - that's an error */ - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", where); - return true; - } - /* For arrays, use a tall (height * depth) 2D texture but taking into * account the inter-image padding specified with the image height packing * property. @@ -277,9 +261,6 @@ bool success = false; int z; - /* XXX: This should probably be passed in from somewhere */ - const char *where = "_mesa_meta_pbo_GetTexSubImage"; - if (!_mesa_is_bufferobj(packing->BufferObj)) return false; @@ -292,19 +273,6 @@ if (ctx->_ImageTransferState) return false; - if (!_mesa_validate_pbo_access(dims, packing, width, height, depth, - format, type, INT_MAX, pixels)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(out of bounds PBO access)", where); - return true; - } - - if (_mesa_check_disallowed_mapping(packing->BufferObj)) { - /* buffer is mapped - that's an error */ - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", where); - return true; - } - /* For arrays, use a tall (height * depth) 2D texture but taking into * account the inter-image padding specified with the image height packing * property. diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i915/i915_debug_fp.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i915/i915_debug_fp.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i915/i915_debug_fp.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i915/i915_debug_fp.c 2015-03-18 08:43:35.000000000 +0000 @@ -303,7 +303,7 @@ printf("\t\tBEGIN\n"); - assert(program[0] & 0x1ff + 2 == sz); + assert((program[0] & 0x1ff) + 2 == sz); program++; for (i = 1; i < sz; i += 3, program += 3) { diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_disasm.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_disasm.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_disasm.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_disasm.c 2015-03-18 08:43:35.000000000 +0000 @@ -597,6 +597,9 @@ } static int +format(FILE *f, const char *format, ...) PRINTFLIKE(2, 3); + +static int format(FILE *f, const char *format, ...) { char buf[1024]; @@ -726,7 +729,7 @@ if (err == -1) return 0; if (brw_inst_dst_da1_subreg_nr(brw, inst)) - format(file, ".%d", brw_inst_dst_da1_subreg_nr(brw, inst) / + format(file, ".%ld", brw_inst_dst_da1_subreg_nr(brw, inst) / reg_type_size[brw_inst_dst_reg_type(brw, inst)]); string(file, "<"); err |= control(file, "horiz stride", horiz_stride, @@ -737,7 +740,7 @@ } else { string(file, "g[a0"); if (brw_inst_dst_ia_subreg_nr(brw, inst)) - format(file, ".%d", brw_inst_dst_ia_subreg_nr(brw, inst) / + format(file, ".%ld", brw_inst_dst_ia_subreg_nr(brw, inst) / reg_type_size[brw_inst_dst_reg_type(brw, inst)]); if (brw_inst_dst_ia1_addr_imm(brw, inst)) format(file, " %d", brw_inst_dst_ia1_addr_imm(brw, inst)); @@ -755,7 +758,7 @@ if (err == -1) return 0; if (brw_inst_dst_da16_subreg_nr(brw, inst)) - format(file, ".%d", brw_inst_dst_da16_subreg_nr(brw, inst) / + format(file, ".%ld", brw_inst_dst_da16_subreg_nr(brw, inst) / reg_type_size[brw_inst_dst_reg_type(brw, inst)]); string(file, "<1>"); err |= control(file, "writemask", writemask, @@ -786,7 +789,7 @@ if (err == -1) return 0; if (brw_inst_3src_dst_subreg_nr(brw, inst)) - format(file, ".%d", brw_inst_3src_dst_subreg_nr(brw, inst)); + format(file, ".%ld", brw_inst_3src_dst_subreg_nr(brw, inst)); string(file, "<1>"); err |= control(file, "writemask", writemask, brw_inst_3src_dst_writemask(brw, inst), NULL); @@ -1222,9 +1225,9 @@ string(file, "("); err |= control(file, "predicate inverse", pred_inv, brw_inst_pred_inv(brw, inst), NULL); - format(file, "f%d", brw->gen >= 7 ? brw_inst_flag_reg_nr(brw, inst) : 0); + format(file, "f%ld", brw->gen >= 7 ? brw_inst_flag_reg_nr(brw, inst) : 0); if (brw_inst_flag_subreg_nr(brw, inst)) - format(file, ".%d", brw_inst_flag_subreg_nr(brw, inst)); + format(file, ".%ld", brw_inst_flag_subreg_nr(brw, inst)); if (brw_inst_access_mode(brw, inst) == BRW_ALIGN_1) { err |= control(file, "predicate control align1", pred_ctrl_align1, brw_inst_pred_control(brw, inst), NULL); @@ -1258,10 +1261,10 @@ (brw->gen < 6 || (opcode != BRW_OPCODE_SEL && opcode != BRW_OPCODE_IF && opcode != BRW_OPCODE_WHILE))) { - format(file, ".f%d", + format(file, ".f%ld", brw->gen >= 7 ? brw_inst_flag_reg_nr(brw, inst) : 0); if (brw_inst_flag_subreg_nr(brw, inst)) - format(file, ".%d", brw_inst_flag_subreg_nr(brw, inst)); + format(file, ".%ld", brw_inst_flag_subreg_nr(brw, inst)); } } @@ -1273,7 +1276,7 @@ } if (opcode == BRW_OPCODE_SEND && brw->gen < 6) - format(file, " %d", brw_inst_base_mrf(brw, inst)); + format(file, " %ld", brw_inst_base_mrf(brw, inst)); if (has_uip(brw, opcode)) { /* Instructions that have UIP also have JIP. */ @@ -1294,7 +1297,7 @@ pad(file, 16); format(file, "Jump: %d", brw_inst_gen4_jump_count(brw, inst)); pad(file, 32); - format(file, "Pop: %d", brw_inst_gen4_pop_count(brw, inst)); + format(file, "Pop: %ld", brw_inst_gen4_pop_count(brw, inst)); } else if (brw->gen < 6 && (opcode == BRW_OPCODE_IF || opcode == BRW_OPCODE_IFF || opcode == BRW_OPCODE_HALT)) { @@ -1302,7 +1305,7 @@ format(file, "Jump: %d", brw_inst_gen4_jump_count(brw, inst)); } else if (brw->gen < 6 && opcode == BRW_OPCODE_ENDIF) { pad(file, 16); - format(file, "Pop: %d", brw_inst_gen4_pop_count(brw, inst)); + format(file, "Pop: %ld", brw_inst_gen4_pop_count(brw, inst)); } else if (opcode == BRW_OPCODE_JMPI) { pad(file, 16); err |= src1(file, brw, inst); @@ -1371,13 +1374,13 @@ break; case BRW_SFID_SAMPLER: if (brw->gen >= 5) { - format(file, " (%d, %d, %d, %d)", + format(file, " (%ld, %ld, %ld, %ld)", brw_inst_binding_table_index(brw, inst), brw_inst_sampler(brw, inst), brw_inst_sampler_msg_type(brw, inst), brw_inst_sampler_simd_mode(brw, inst)); } else { - format(file, " (%d, %d, %d, ", + format(file, " (%ld, %ld, %ld, ", brw_inst_binding_table_index(brw, inst), brw_inst_sampler(brw, inst), brw_inst_sampler_msg_type(brw, inst)); @@ -1392,13 +1395,13 @@ case GEN6_SFID_DATAPORT_SAMPLER_CACHE: /* aka BRW_SFID_DATAPORT_READ on Gen4-5 */ if (brw->gen >= 6) { - format(file, " (%d, %d, %d, %d)", + format(file, " (%ld, %ld, %ld, %ld)", brw_inst_binding_table_index(brw, inst), brw_inst_dp_msg_control(brw, inst), brw_inst_dp_msg_type(brw, inst), brw->gen >= 7 ? 0 : brw_inst_dp_write_commit(brw, inst)); } else { - format(file, " (%d, %d, %d)", + format(file, " (%ld, %ld, %ld)", brw_inst_binding_table_index(brw, inst), brw_inst_dp_read_msg_control(brw, inst), brw_inst_dp_read_msg_type(brw, inst)); @@ -1428,16 +1431,16 @@ if (brw->gen < 7 && brw_inst_dp_write_commit(brw, inst)) string(file, " WriteCommit"); } else { - format(file, " MsgCtrl = 0x%x", + format(file, " MsgCtrl = 0x%lx", brw_inst_dp_write_msg_control(brw, inst)); } - format(file, " Surface = %d", brw_inst_binding_table_index(brw, inst)); + format(file, " Surface = %ld", brw_inst_binding_table_index(brw, inst)); break; } case BRW_SFID_URB: - format(file, " %d", brw_inst_urb_global_offset(brw, inst)); + format(file, " %ld", brw_inst_urb_global_offset(brw, inst)); space = 1; if (brw->gen >= 7) { @@ -1470,7 +1473,7 @@ dp_dc0_msg_type_gen7, brw_inst_dp_msg_type(brw, inst), &space); - format(file, ", %d, ", brw_inst_binding_table_index(brw, inst)); + format(file, ", %ld, ", brw_inst_binding_table_index(brw, inst)); switch (brw_inst_dp_msg_type(brw, inst)) { case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP: @@ -1478,7 +1481,7 @@ brw_inst_imm_ud(brw, inst) >> 8 & 0xf, &space); break; default: - format(file, "%d", brw_inst_dp_msg_control(brw, inst)); + format(file, "%ld", brw_inst_dp_msg_control(brw, inst)); } format(file, ")"); break; @@ -1495,7 +1498,7 @@ dp_dc1_msg_type_hsw, brw_inst_dp_msg_type(brw, inst), &space); - format(file, ", Surface = %d, ", + format(file, ", Surface = %ld, ", brw_inst_binding_table_index(brw, inst)); switch (brw_inst_dp_msg_type(brw, inst)) { @@ -1529,7 +1532,7 @@ case GEN7_SFID_PIXEL_INTERPOLATOR: if (brw->gen >= 7) { - format(file, " (%s, %s, 0x%02x)", + format(file, " (%s, %s, 0x%02lx)", brw_inst_pi_nopersp(brw, inst) ? "linear" : "persp", pixel_interpolator_msg_types[brw_inst_pi_message_type(brw, inst)], brw_inst_pi_message_data(brw, inst)); @@ -1544,8 +1547,8 @@ if (space) string(file, " "); - format(file, "mlen %d", brw_inst_mlen(brw, inst)); - format(file, " rlen %d", brw_inst_rlen(brw, inst)); + format(file, "mlen %ld", brw_inst_mlen(brw, inst)); + format(file, " rlen %ld", brw_inst_rlen(brw, inst)); } } pad(file, 64); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_eu_emit.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_eu_emit.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_eu_emit.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_eu_emit.c 2015-03-18 08:43:35.000000000 +0000 @@ -1332,7 +1332,7 @@ } else if (brw->gen == 7) { brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src1(p, insn, brw_imm_ud(0)); + brw_set_src1(p, insn, brw_imm_w(0)); brw_inst_set_jip(brw, insn, 0); brw_inst_set_uip(brw, insn, 0); } else { @@ -1533,7 +1533,7 @@ } else if (brw->gen == 7) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_ud(0)); + brw_set_src1(p, insn, brw_imm_w(0)); brw_inst_set_jip(brw, insn, 0); brw_inst_set_uip(brw, insn, 0); } else { @@ -1610,7 +1610,7 @@ } else if (brw->gen == 7) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_ud(0)); + brw_set_src1(p, insn, brw_imm_w(0)); } else { brw_set_src0(p, insn, brw_imm_d(0)); } @@ -1802,7 +1802,7 @@ } else if (brw->gen == 7) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_ud(0)); + brw_set_src1(p, insn, brw_imm_w(0)); brw_inst_set_jip(brw, insn, br * (do_insn - insn)); } else { brw_set_dest(p, insn, brw_imm_w(0)); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -57,7 +57,8 @@ foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { ip--; - if ((inst->opcode != BRW_OPCODE_CMP && + if ((inst->opcode != BRW_OPCODE_AND && + inst->opcode != BRW_OPCODE_CMP && inst->opcode != BRW_OPCODE_MOV) || inst->predicate != BRW_PREDICATE_NONE || !inst->dst.is_null() || @@ -65,6 +66,19 @@ inst->src[0].abs) continue; + /* Only an AND.NZ can be propagated. Many AND.Z instructions are + * generated (for ir_unop_not in fs_visitor::emit_bool_to_cond_code). + * Propagating those would require inverting the condition on the CMP. + * This changes both the flag value and the register destination of the + * CMP. That result may be used elsewhere, so we can't change its value + * on a whim. + */ + if (inst->opcode == BRW_OPCODE_AND && + !(inst->src[1].is_one() && + inst->conditional_mod == BRW_CONDITIONAL_NZ && + !inst->src[0].negate)) + continue; + if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) continue; @@ -80,6 +94,21 @@ scan_inst->dst.reg_offset != inst->src[0].reg_offset) break; + /* This must be done before the dst.type check because the result + * type of the AND will always be D, but the result of the CMP + * could be anything. The assumption is that the AND is just + * figuring out what the result of the previous comparison was + * instead of doing a new comparison with a different type. + */ + if (inst->opcode == BRW_OPCODE_AND) { + if (scan_inst->opcode == BRW_OPCODE_CMP) { + inst->remove(block); + progress = true; + } + + break; + } + /* Comparisons operate differently for ints and floats */ if (scan_inst->dst.type != inst->dst.type) break; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -2281,8 +2281,13 @@ if (inst->src[i].file != UNIFORM) continue; - int pull_index = pull_constant_loc[inst->src[i].reg + - inst->src[i].reg_offset]; + int pull_index; + unsigned location = inst->src[i].reg + inst->src[i].reg_offset; + if (location >= uniforms) /* Out of bounds access */ + pull_index = -1; + else + pull_index = pull_constant_loc[location]; + if (pull_index == -1) continue; @@ -2486,6 +2491,7 @@ inst->opcode = BRW_OPCODE_MUL; inst->src[0] = inst->src[2]; inst->src[2] = reg_undef; + progress = true; } else if (inst->src[1].is_one()) { inst->opcode = BRW_OPCODE_ADD; inst->src[1] = inst->src[2]; @@ -2516,8 +2522,16 @@ default: break; } - } + /* Swap if src[0] is immediate. */ + if (progress && inst->is_commutative()) { + if (inst->src[0].file == IMM) { + fs_reg tmp = inst->src[1]; + inst->src[1] = inst->src[0]; + inst->src[0] = tmp; + } + } + } return progress; } @@ -3835,12 +3849,17 @@ if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_begin(); - foreach_in_list(ir_instruction, ir, shader->base.ir) { - base_ir = ir; - this->result = reg_undef; - ir->accept(this); + if (getenv("INTEL_USE_NIR") != NULL) { + emit_nir_code(); + } else { + foreach_in_list(ir_instruction, ir, shader->base.ir) { + base_ir = ir; + this->result = reg_undef; + ir->accept(this); + } + base_ir = NULL; } - base_ir = NULL; + if (failed) return false; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs_cse.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs_cse.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -109,28 +109,6 @@ } static bool -is_expression_commutative(const fs_inst *inst) -{ - switch (inst->opcode) { - case BRW_OPCODE_AND: - case BRW_OPCODE_OR: - case BRW_OPCODE_XOR: - case BRW_OPCODE_ADD: - case BRW_OPCODE_MUL: - return true; - case BRW_OPCODE_SEL: - /* MIN and MAX are commutative. */ - if (inst->conditional_mod == BRW_CONDITIONAL_GE || - inst->conditional_mod == BRW_CONDITIONAL_L) { - return true; - } - /* fallthrough */ - default: - return false; - } -} - -static bool operands_match(const fs_inst *a, const fs_inst *b) { fs_reg *xs = a->src; @@ -140,7 +118,7 @@ return xs[0].equals(ys[0]) && ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) || (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); - } else if (!is_expression_commutative(a)) { + } else if (!a->is_commutative()) { bool match = true; for (int i = 0; i < a->sources; i++) { if (!xs[i].equals(ys[i])) { diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs.h 2015-03-18 08:43:35.000000000 +0000 @@ -417,6 +417,8 @@ void visit_atomic_counter_intrinsic(ir_call *ir); const void *const key; + const struct brw_sampler_prog_key_data *key_tex; + struct brw_stage_prog_data *prog_data; unsigned int sanity_param_count; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs_nir.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs_nir.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -102,10 +102,10 @@ nir_lower_var_copies(nir); nir_validate_shader(nir); - nir_lower_io(nir); - nir_validate_shader(nir); + /* Get rid of split copies */ + nir_optimize(nir); - nir_lower_locals_to_regs(nir); + nir_lower_io(nir); nir_validate_shader(nir); nir_remove_dead_variables(nir); @@ -122,6 +122,9 @@ nir_optimize(nir); + nir_lower_locals_to_regs(nir); + nir_validate_shader(nir); + nir_lower_to_source_mods(nir); nir_validate_shader(nir); nir_copy_prop(nir); @@ -196,18 +199,48 @@ struct hash_entry *entry; hash_table_foreach(shader->inputs, entry) { nir_variable *var = (nir_variable *) entry->data; - fs_reg varying = offset(nir_inputs, var->data.driver_location); + enum brw_reg_type type = brw_type_for_base_type(var->type); + fs_reg input = offset(nir_inputs, var->data.driver_location); fs_reg reg; - if (var->data.location == VARYING_SLOT_POS) { - reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, - var->data.origin_upper_left); - emit_percomp(MOV(varying, reg), 0xF); - } else { - emit_general_interpolation(varying, var->name, var->type, - (glsl_interp_qualifier) var->data.interpolation, - var->data.location, var->data.centroid, - var->data.sample); + switch (stage) { + case MESA_SHADER_VERTEX: { + /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value + * stored in nir_variable::location. + * + * However, NIR's load_input intrinsics use a different index - an + * offset into a single contiguous array containing all inputs. + * This index corresponds to the nir_variable::driver_location field. + * + * So, we need to copy from fs_reg(ATTR, var->location) to + * offset(nir_inputs, var->data.driver_location). + */ + unsigned components = var->type->without_array()->components(); + unsigned array_length = var->type->is_array() ? var->type->length : 1; + for (unsigned i = 0; i < array_length; i++) { + for (unsigned j = 0; j < components; j++) { + emit(MOV(retype(offset(input, components * i + j), type), + offset(fs_reg(ATTR, var->data.location + i, type), j))); + } + } + break; + } + case MESA_SHADER_GEOMETRY: + case MESA_SHADER_COMPUTE: + unreachable("fs_visitor not used for these stages yet."); + break; + case MESA_SHADER_FRAGMENT: + if (var->data.location == VARYING_SLOT_POS) { + reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, + var->data.origin_upper_left); + emit_percomp(MOV(input, reg), 0xF); + } else { + emit_general_interpolation(input, var->name, var->type, + (glsl_interp_qualifier) var->data.interpolation, + var->data.location, var->data.centroid, + var->data.sample); + } + break; } } } @@ -222,7 +255,17 @@ nir_variable *var = (nir_variable *) entry->data; fs_reg reg = offset(nir_outputs, var->data.driver_location); - if (var->data.index > 0) { + int vector_elements = + var->type->is_array() ? var->type->fields.array->vector_elements + : var->type->vector_elements; + + if (stage == MESA_SHADER_VERTEX) { + for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) { + int output = var->data.location + i; + this->outputs[output] = offset(reg, 4 * i); + this->output_components[output] = vector_elements; + } + } else if (var->data.index > 0) { assert(var->data.location == FRAG_RESULT_DATA0); assert(var->data.index == 1); this->dual_src_output = reg; @@ -242,10 +285,6 @@ assert(var->data.location >= FRAG_RESULT_DATA0 && var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); - int vector_elements = - var->type->is_array() ? var->type->fields.array->vector_elements - : var->type->vector_elements; - /* General color output. */ for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) { int output = var->data.location - FRAG_RESULT_DATA0 + i; @@ -360,6 +399,30 @@ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { + case nir_intrinsic_load_vertex_id: + unreachable("should be lowered by lower_vertex_id()."); + + case nir_intrinsic_load_vertex_id_zero_base: + assert(v->stage == MESA_SHADER_VERTEX); + reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]; + if (reg->file == BAD_FILE) + *reg = *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); + break; + + case nir_intrinsic_load_base_vertex: + assert(v->stage == MESA_SHADER_VERTEX); + reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX]; + if (reg->file == BAD_FILE) + *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX); + break; + + case nir_intrinsic_load_instance_id: + assert(v->stage == MESA_SHADER_VERTEX); + reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID]; + if (reg->file == BAD_FILE) + *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID); + break; + case nir_intrinsic_load_sample_pos: assert(v->stage == MESA_SHADER_FRAGMENT); reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS]; @@ -1341,6 +1404,33 @@ *emit_frontfacing_interpolation())); break; + case nir_intrinsic_load_vertex_id: + unreachable("should be lowered by lower_vertex_id()"); + + case nir_intrinsic_load_vertex_id_zero_base: { + fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]; + assert(vertex_id.file != BAD_FILE); + dest.type = vertex_id.type; + emit(MOV(dest, vertex_id)); + break; + } + + case nir_intrinsic_load_base_vertex: { + fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX]; + assert(base_vertex.file != BAD_FILE); + dest.type = base_vertex.type; + emit(MOV(dest, base_vertex)); + break; + } + + case nir_intrinsic_load_instance_id: { + fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID]; + assert(instance_id.file != BAD_FILE); + dest.type = instance_id.type; + emit(MOV(dest, instance_id)); + break; + } + case nir_intrinsic_load_sample_mask_in: { fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN]; assert(sample_mask_in.file != BAD_FILE); @@ -1612,7 +1702,6 @@ void fs_visitor::nir_emit_texture(nir_tex_instr *instr) { - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; unsigned sampler = instr->sampler_index; fs_reg sampler_reg(sampler); @@ -1709,10 +1798,12 @@ } if (instr->op == nir_texop_txf_ms) { - if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<gen >= 7 && + key_tex->compressed_multisample_layout_mask & (1 << sampler)) { mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg); - else + } else { mcs = fs_reg(0u); + } } for (unsigned i = 0; i < 3; i++) { diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -1860,19 +1860,6 @@ return inst; } -static struct brw_sampler_prog_key_data * -get_tex(gl_shader_stage stage, const void *key) -{ - switch (stage) { - case MESA_SHADER_FRAGMENT: - return &((brw_wm_prog_key*) key)->tex; - case MESA_SHADER_VERTEX: - return &((brw_vue_prog_key*) key)->tex; - default: - unreachable("unhandled shader stage"); - } -} - fs_reg fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, bool is_rect, uint32_t sampler, int texunit) @@ -1880,7 +1867,6 @@ fs_inst *inst = NULL; bool needs_gl_clamp = true; fs_reg scale_x, scale_y; - struct brw_sampler_prog_key_data *tex = get_tex(stage, this->key); /* The 965 requires the EU to do the normalization of GL rectangle * texture coordinates. We use the program parameter state @@ -1888,8 +1874,8 @@ */ if (is_rect && (brw->gen < 6 || - (brw->gen >= 6 && (tex->gl_clamp_mask[0] & (1 << sampler) || - tex->gl_clamp_mask[1] & (1 << sampler))))) { + (brw->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) || + key_tex->gl_clamp_mask[1] & (1 << sampler))))) { struct gl_program_parameter_list *params = prog->Parameters; int tokens[STATE_LENGTH] = { STATE_INTERNAL, @@ -1950,7 +1936,7 @@ needs_gl_clamp = false; for (int i = 0; i < 2; i++) { - if (tex->gl_clamp_mask[i] & (1 << sampler)) { + if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { fs_reg chan = coordinate; chan = offset(chan, i); @@ -1975,7 +1961,7 @@ if (coord_components > 0 && needs_gl_clamp) { for (int i = 0; i < MIN2(coord_components, 3); i++) { - if (tex->gl_clamp_mask[i] & (1 << sampler)) { + if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { fs_reg chan = coordinate; chan = offset(chan, i); @@ -2033,14 +2019,13 @@ uint32_t sampler, fs_reg sampler_reg, int texunit) { - struct brw_sampler_prog_key_data *tex = get_tex(stage, this->key); fs_inst *inst = NULL; if (op == ir_tg4) { /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother * emitting anything other than setting up the constant result. */ - int swiz = GET_SWZ(tex->swizzles[sampler], gather_component); + int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component); if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { fs_reg res = vgrf(glsl_type::vec4_type); @@ -2094,7 +2079,7 @@ gather_channel(gather_component, sampler) << 16; /* M0.2:16-17 */ if (brw->gen == 6) - emit_gen6_gather_wa(tex->gen6_gather_wa[sampler], dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], dst); } /* fixup #layers for cube map arrays */ @@ -2121,7 +2106,6 @@ void fs_visitor::visit(ir_texture *ir) { - const struct brw_sampler_prog_key_data *tex = get_tex(stage, this->key); uint32_t sampler = _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); @@ -2238,11 +2222,13 @@ ir->lod_info.sample_index->accept(this); sample_index = this->result; - if (brw->gen >= 7 && tex->compressed_multisample_layout_mask & (1<gen >= 7 && + key_tex->compressed_multisample_layout_mask & (1 << sampler)) { mcs = emit_mcs_fetch(coordinate, ir->coordinate->type->vector_elements, sampler_reg); - else + } else { mcs = fs_reg(0u); + } break; default: unreachable("Unrecognized texture opcode"); @@ -2302,15 +2288,14 @@ uint32_t fs_visitor::gather_channel(int orig_chan, uint32_t sampler) { - struct brw_sampler_prog_key_data *tex = get_tex(stage, this->key); - int swiz = GET_SWZ(tex->swizzles[sampler], orig_chan); + int swiz = GET_SWZ(key_tex->swizzles[sampler], orig_chan); switch (swiz) { case SWIZZLE_X: return 0; case SWIZZLE_Y: /* gather4 sampler is broken for green channel on RG32F -- * we must ask for blue instead. */ - if (tex->gather_channel_quirk_mask & (1<gather_channel_quirk_mask & (1 << sampler)) return 2; return 1; case SWIZZLE_Z: return 2; @@ -2342,16 +2327,14 @@ if (op == ir_txs || op == ir_lod || op == ir_tg4) return; - struct brw_sampler_prog_key_data *tex = get_tex(stage, this->key); - if (dest_components == 1) { /* Ignore DEPTH_TEXTURE_MODE swizzling. */ - } else if (tex->swizzles[sampler] != SWIZZLE_NOOP) { + } else if (key_tex->swizzles[sampler] != SWIZZLE_NOOP) { fs_reg swizzled_result = vgrf(glsl_type::vec4_type); swizzled_result.type = orig_val.type; for (int i = 0; i < 4; i++) { - int swiz = GET_SWZ(tex->swizzles[sampler], i); + int swiz = GET_SWZ(key_tex->swizzles[sampler], i); fs_reg l = swizzled_result; l = offset(l, i); @@ -2361,7 +2344,7 @@ emit(MOV(l, fs_reg(1.0f))); } else { emit(MOV(l, offset(orig_val, - GET_SWZ(tex->swizzles[sampler], i)))); + GET_SWZ(key_tex->swizzles[sampler], i)))); } } this->result = swizzled_result; @@ -2752,11 +2735,11 @@ if (!then_rhs || !else_rhs) return false; - if ((then_rhs->is_one() || then_rhs->is_negative_one()) && - (else_rhs->is_one() || else_rhs->is_negative_one())) { - assert(then_rhs->is_one() == else_rhs->is_negative_one()); - assert(else_rhs->is_one() == then_rhs->is_negative_one()); + if (then_rhs->type->base_type != GLSL_TYPE_FLOAT) + return false; + if ((then_rhs->is_one() && else_rhs->is_negative_one()) || + (else_rhs->is_one() && then_rhs->is_negative_one())) { then_assign->lhs->accept(this); fs_reg dst = this->result; dst.type = BRW_REGISTER_TYPE_D; @@ -4034,6 +4017,18 @@ void fs_visitor::init() { + switch (stage) { + case MESA_SHADER_FRAGMENT: + key_tex = &((const brw_wm_prog_key *) key)->tex; + break; + case MESA_SHADER_VERTEX: + case MESA_SHADER_GEOMETRY: + key_tex = &((const brw_vue_prog_key *) key)->tex; + break; + default: + unreachable("unhandled shader stage"); + } + this->failed = false; this->simd16_unsupported = false; this->no16_msg = NULL; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_shader.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_shader.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_shader.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_shader.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -769,6 +769,28 @@ } bool +backend_instruction::is_commutative() const +{ + switch (opcode) { + case BRW_OPCODE_AND: + case BRW_OPCODE_OR: + case BRW_OPCODE_XOR: + case BRW_OPCODE_ADD: + case BRW_OPCODE_MUL: + return true; + case BRW_OPCODE_SEL: + /* MIN and MAX are commutative. */ + if (conditional_mod == BRW_CONDITIONAL_GE || + conditional_mod == BRW_CONDITIONAL_L) { + return true; + } + /* fallthrough */ + default: + return false; + } +} + +bool backend_instruction::is_3src() const { return opcode < ARRAY_SIZE(opcode_descs) && opcode_descs[opcode].nsrc == 3; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_shader.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_shader.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_shader.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_shader.h 2015-03-18 08:43:35.000000000 +0000 @@ -94,6 +94,7 @@ bool is_tex() const; bool is_math() const; bool is_control_flow() const; + bool is_commutative() const; bool can_do_source_mods() const; bool can_do_saturate() const; bool can_do_cmod() const; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -89,28 +89,6 @@ } static bool -is_expression_commutative(const vec4_instruction *inst) -{ - switch (inst->opcode) { - case BRW_OPCODE_AND: - case BRW_OPCODE_OR: - case BRW_OPCODE_XOR: - case BRW_OPCODE_ADD: - case BRW_OPCODE_MUL: - return true; - case BRW_OPCODE_SEL: - /* MIN and MAX are commutative. */ - if (inst->conditional_mod == BRW_CONDITIONAL_GE || - inst->conditional_mod == BRW_CONDITIONAL_L) { - return true; - } - /* fallthrough */ - default: - return false; - } -} - -static bool operands_match(const vec4_instruction *a, const vec4_instruction *b) { const src_reg *xs = a->src; @@ -120,7 +98,7 @@ return xs[0].equals(ys[0]) && ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) || (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); - } else if (!is_expression_commutative(a)) { + } else if (!a->is_commutative()) { return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]); } else { return (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) || diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -1049,18 +1049,38 @@ { assert(surf_index.type == BRW_REGISTER_TYPE_UD); + struct brw_reg src = offset; + bool header_present = false; + int mlen = 1; + + if (brw->gen >= 9) { + /* Skylake requires a message header in order to use SIMD4x2 mode. */ + src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD); + mlen = 2; + header_present = true; + + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, src, retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_set_default_access_mode(p, BRW_ALIGN_1); + + brw_MOV(p, get_element_ud(src, 2), + brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); + brw_pop_insn_state(p); + } + if (surf_index.file == BRW_IMMEDIATE_VALUE) { brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, offset); + brw_set_src0(p, insn, src); brw_set_sampler_message(p, insn, surf_index.dw1.ud, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ + mlen, + header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1089,8 +1109,8 @@ 0 /* sampler */, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1 /* rlen */, - 1 /* mlen */, - false /* header */, + mlen /* mlen */, + header_present /* header */, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1); @@ -1102,7 +1122,7 @@ /* dst = send(offset, a0.0) */ brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn_send, dst); - brw_set_src0(p, insn_send, offset); + brw_set_src0(p, insn_send, src); brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr); brw_pop_insn_state(p); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -1555,6 +1555,11 @@ } case ir_binop_all_equal: + if (brw->gen <= 5) { + resolve_bool_comparison(ir->operands[0], &op[0]); + resolve_bool_comparison(ir->operands[1], &op[1]); + } + /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { @@ -1567,6 +1572,11 @@ } break; case ir_binop_any_nequal: + if (brw->gen <= 5) { + resolve_bool_comparison(ir->operands[0], &op[0]); + resolve_bool_comparison(ir->operands[1], &op[1]); + } + /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { @@ -1581,6 +1591,9 @@ break; case ir_unop_any: + if (brw->gen <= 5) { + resolve_bool_comparison(ir->operands[0], &op[0]); + } emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); emit(MOV(result_dst, src_reg(0))); @@ -1770,6 +1783,15 @@ if (brw->gen >= 7) { dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + + /* We have to use a message header on Skylake to get SIMD4x2 mode. + * Reserve space for the register. + */ + if (brw->gen >= 9) { + grf_offset.reg_offset++; + alloc.sizes[grf_offset.reg] = 2; + } + grf_offset.type = offset.type; emit(MOV(grf_offset, offset)); @@ -3464,6 +3486,15 @@ if (brw->gen >= 7) { dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + + /* We have to use a message header on Skylake to get SIMD4x2 mode. + * Reserve space for the register. + */ + if (brw->gen >= 9) { + grf_offset.reg_offset++; + alloc.sizes[grf_offset.reg] = 2; + } + grf_offset.type = offset.type; emit_before(block, inst, MOV(grf_offset, offset)); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -527,6 +527,15 @@ /* Add the small constant index to the address register */ src_reg reladdr = src_reg(this, glsl_type::int_type); + + /* We have to use a message header on Skylake to get SIMD4x2 mode. + * Reserve space for the register. + */ + if (brw->gen >= 9) { + reladdr.reg_offset++; + alloc.sizes[reladdr.reg] = 2; + } + dst_reg dst_reladdr = dst_reg(reladdr); dst_reladdr.writemask = WRITEMASK_X; emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index))); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/intel_tex_validate.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/intel_tex_validate.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/intel_tex_validate.c 2014-09-10 05:44:12.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/intel_tex_validate.c 2015-03-18 08:43:35.000000000 +0000 @@ -98,10 +98,17 @@ return true; } - /* Immutable textures should not get this far -- they should have been - * created in a validated state, and nothing can invalidate them. + /* On recent generations, immutable textures should not get this far + * -- they should have been created in a validated state, and nothing + * can invalidate them. + * + * Unfortunately, this is not true on pre-Sandybridge hardware -- when + * rendering into an immutable-format depth texture we may have to rebase + * the rendered levels to meet alignment requirements. + * + * FINISHME: Avoid doing this. */ - assert(!tObj->Immutable); + assert(!tObj->Immutable || brw->gen < 6); firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c 2015-03-18 08:43:35.000000000 +0000 @@ -398,6 +398,8 @@ return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, src_pitch, swizzle_bit, rgba8_copy_aligned_dst); + else + unreachable("not reached"); } else { if (mem_copy == memcpy) return linear_to_xtiled(x0, x1, x2, x3, y0, y1, @@ -406,6 +408,8 @@ return linear_to_xtiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, rgba8_copy_aligned_dst); + else + unreachable("not reached"); } linear_to_xtiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, mem_copy); @@ -436,6 +440,8 @@ return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, src_pitch, swizzle_bit, rgba8_copy_aligned_dst); + else + unreachable("not reached"); } else { if (mem_copy == memcpy) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, @@ -444,6 +450,8 @@ return linear_to_ytiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, rgba8_copy_aligned_dst); + else + unreachable("not reached"); } linear_to_ytiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, mem_copy); @@ -474,6 +482,8 @@ return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, dst_pitch, swizzle_bit, rgba8_copy_aligned_src); + else + unreachable("not reached"); } else { if (mem_copy == memcpy) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, @@ -482,6 +492,8 @@ return xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, rgba8_copy_aligned_src); + else + unreachable("not reached"); } xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, mem_copy); @@ -512,6 +524,8 @@ return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, dst_pitch, swizzle_bit, rgba8_copy_aligned_src); + else + unreachable("not reached"); } else { if (mem_copy == memcpy) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, @@ -520,6 +534,8 @@ return ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, rgba8_copy_aligned_src); + else + unreachable("not reached"); } ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, mem_copy); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -449,3 +449,108 @@ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); } + +TEST_F(cmod_propagation_test, andnz_one) +{ + fs_reg dest = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(0.0f); + fs_reg one(1); + + v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) + ->conditional_mod = BRW_CONDITIONAL_L; + v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, one) + ->conditional_mod = BRW_CONDITIONAL_NZ; + + /* = Before = + * 0: cmp.l.f0(8) dest:F src0:F 0F + * 1: and.nz.f0(8) null:D dest:D 1D + * + * = After = + * 0: cmp.l.f0(8) dest:F src0:F 0F + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_TRUE(retype(dest, BRW_REGISTER_TYPE_F) + .equals(instruction(block0, 0)->dst)); +} + +TEST_F(cmod_propagation_test, andnz_non_one) +{ + fs_reg dest = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(0.0f); + fs_reg nonone(38); + + v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) + ->conditional_mod = BRW_CONDITIONAL_L; + v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, nonone) + ->conditional_mod = BRW_CONDITIONAL_NZ; + + /* = Before = + * 0: cmp.l.f0(8) dest:F src0:F 0F + * 1: and.nz.f0(8) null:D dest:D 38D + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, andz_one) +{ + fs_reg dest = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(0.0f); + fs_reg one(1); + + v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero) + ->conditional_mod = BRW_CONDITIONAL_L; + v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, one) + ->conditional_mod = BRW_CONDITIONAL_Z; + + /* = Before = + * 0: cmp.l.f0(8) dest:F src0:F 0F + * 1: and.z.f0(8) null:D dest:D 1D + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod); +} diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/x11/fakeglx.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/x11/fakeglx.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/drivers/x11/fakeglx.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/drivers/x11/fakeglx.c 2015-03-18 08:43:35.000000000 +0000 @@ -40,6 +40,7 @@ */ +#include #include #include "glxheader.h" #include "glxapi.h" @@ -846,7 +847,7 @@ ext = dpy->ext_procs; /* new extension is at head of list */ assert(c->extension == ext->codes.extension); (void) c; /* silence warning */ - ext->name = _mesa_strdup(extName); + ext->name = strdup(extName); ext->close_display = close_display_callback; } } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/bufferobj.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/bufferobj.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/bufferobj.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/bufferobj.c 2015-03-18 08:43:35.000000000 +0000 @@ -232,67 +232,62 @@ * \c glClearBufferSubData. * * \param ctx GL context. - * \param target Buffer object target on which to operate. + * \param bufObj The buffer object. * \param offset Offset of the first byte of the subdata range. * \param size Size, in bytes, of the subdata range. * \param mappedRange If true, checks if an overlapping range is mapped. * If false, checks if buffer is mapped. - * \param errorNoBuffer Error code if no buffer is bound to target. * \param caller Name of calling function for recording errors. - * \return A pointer to the buffer object bound to \c target in the - * specified context or \c NULL if any of the parameter or state - * conditions are invalid. + * \return false if error, true otherwise * * \sa glBufferSubDataARB, glGetBufferSubDataARB, glClearBufferSubData */ -static struct gl_buffer_object * -buffer_object_subdata_range_good(struct gl_context * ctx, GLenum target, - GLintptrARB offset, GLsizeiptrARB size, - bool mappedRange, GLenum errorNoBuffer, - const char *caller) +static bool +buffer_object_subdata_range_good(struct gl_context *ctx, + struct gl_buffer_object *bufObj, + GLintptr offset, GLsizeiptr size, + bool mappedRange, const char *caller) { - struct gl_buffer_object *bufObj; - if (size < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(size < 0)", caller); - return NULL; + return false; } if (offset < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(offset < 0)", caller); - return NULL; + return false; } - bufObj = get_buffer(ctx, caller, target, errorNoBuffer); - if (!bufObj) - return NULL; - if (offset + size > bufObj->Size) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(offset %lu + size %lu > buffer size %lu)", caller, (unsigned long) offset, (unsigned long) size, (unsigned long) bufObj->Size); - return NULL; + return false; } if (bufObj->Mappings[MAP_USER].AccessFlags & GL_MAP_PERSISTENT_BIT) - return bufObj; + return true; if (mappedRange) { if (bufferobj_range_mapped(bufObj, offset, size)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "%s", caller); - return NULL; + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(range is mapped without persistent bit)", + caller); + return false; } } else { if (_mesa_bufferobj_mapped(bufObj, MAP_USER)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "%s", caller); - return NULL; + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(buffer is mapped without persistent bit)", + caller); + return false; } } - return bufObj; + return true; } @@ -561,9 +556,9 @@ * \sa glBufferDataARB, dd_function_table::BufferData. */ static GLboolean -_mesa_buffer_data( struct gl_context *ctx, GLenum target, GLsizeiptrARB size, - const GLvoid * data, GLenum usage, GLenum storageFlags, - struct gl_buffer_object * bufObj ) +buffer_data_fallback(struct gl_context *ctx, GLenum target, GLsizeiptr size, + const GLvoid *data, GLenum usage, GLenum storageFlags, + struct gl_buffer_object *bufObj) { void * new_data; @@ -607,9 +602,9 @@ * \sa glBufferSubDataARB, dd_function_table::BufferSubData. */ static void -_mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset, - GLsizeiptrARB size, const GLvoid * data, - struct gl_buffer_object * bufObj ) +buffer_sub_data_fallback(struct gl_context *ctx, GLintptr offset, + GLsizeiptr size, const GLvoid *data, + struct gl_buffer_object *bufObj) { (void) ctx; @@ -670,11 +665,11 @@ * dd_function_table::ClearBufferSubData. */ void -_mesa_buffer_clear_subdata(struct gl_context *ctx, - GLintptr offset, GLsizeiptr size, - const GLvoid *clearValue, - GLsizeiptr clearValueSize, - struct gl_buffer_object *bufObj) +_mesa_ClearBufferSubData_sw(struct gl_context *ctx, + GLintptr offset, GLsizeiptr size, + const GLvoid *clearValue, + GLsizeiptr clearValueSize, + struct gl_buffer_object *bufObj) { GLsizeiptr i; GLubyte *dest; @@ -711,10 +706,10 @@ * Called via glMapBufferRange(). */ static void * -_mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset, - GLsizeiptr length, GLbitfield access, - struct gl_buffer_object *bufObj, - gl_map_buffer_index index) +map_buffer_range_fallback(struct gl_context *ctx, GLintptr offset, + GLsizeiptr length, GLbitfield access, + struct gl_buffer_object *bufObj, + gl_map_buffer_index index) { (void) ctx; assert(!_mesa_bufferobj_mapped(bufObj, index)); @@ -732,10 +727,10 @@ * Called via glFlushMappedBufferRange(). */ static void -_mesa_buffer_flush_mapped_range( struct gl_context *ctx, - GLintptr offset, GLsizeiptr length, - struct gl_buffer_object *obj, - gl_map_buffer_index index) +flush_mapped_buffer_range_fallback(struct gl_context *ctx, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj, + gl_map_buffer_index index) { (void) ctx; (void) offset; @@ -746,15 +741,15 @@ /** - * Default callback for \c dd_function_table::MapBuffer(). + * Default callback for \c dd_function_table::UnmapBuffer(). * * The input parameters will have been already tested for errors. * * \sa glUnmapBufferARB, dd_function_table::UnmapBuffer */ static GLboolean -_mesa_buffer_unmap(struct gl_context *ctx, struct gl_buffer_object *bufObj, - gl_map_buffer_index index) +unmap_buffer_fallback(struct gl_context *ctx, struct gl_buffer_object *bufObj, + gl_map_buffer_index index) { (void) ctx; /* XXX we might assert here that bufObj->Pointer is non-null */ @@ -771,11 +766,11 @@ * Called via glCopyBufferSubData(). */ static void -_mesa_copy_buffer_subdata(struct gl_context *ctx, - struct gl_buffer_object *src, - struct gl_buffer_object *dst, - GLintptr readOffset, GLintptr writeOffset, - GLsizeiptr size) +copy_buffer_sub_data_fallback(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr readOffset, GLintptr writeOffset, + GLsizeiptr size) { GLubyte *srcPtr, *dstPtr; @@ -1117,20 +1112,20 @@ /* GL_ARB_vertex/pixel_buffer_object */ driver->NewBufferObject = _mesa_new_buffer_object; driver->DeleteBuffer = _mesa_delete_buffer_object; - driver->BufferData = _mesa_buffer_data; - driver->BufferSubData = _mesa_buffer_subdata; + driver->BufferData = buffer_data_fallback; + driver->BufferSubData = buffer_sub_data_fallback; driver->GetBufferSubData = _mesa_buffer_get_subdata; - driver->UnmapBuffer = _mesa_buffer_unmap; + driver->UnmapBuffer = unmap_buffer_fallback; /* GL_ARB_clear_buffer_object */ - driver->ClearBufferSubData = _mesa_buffer_clear_subdata; + driver->ClearBufferSubData = _mesa_ClearBufferSubData_sw; /* GL_ARB_map_buffer_range */ - driver->MapBufferRange = _mesa_buffer_map_range; - driver->FlushMappedBufferRange = _mesa_buffer_flush_mapped_range; + driver->MapBufferRange = map_buffer_range_fallback; + driver->FlushMappedBufferRange = flush_mapped_buffer_range_fallback; /* GL_ARB_copy_buffer */ - driver->CopyBufferSubData = _mesa_copy_buffer_subdata; + driver->CopyBufferSubData = copy_buffer_sub_data_fallback; } @@ -1292,27 +1287,29 @@ /** - * Generate a set of unique buffer object IDs and store them in \c buffer. - * - * \param n Number of IDs to generate. - * \param buffer Array of \c n locations to store the IDs. + * This is the implementation for glGenBuffers and glCreateBuffers. It is not + * exposed to the rest of Mesa to encourage the use of nameless buffers in + * driver internals. */ -void GLAPIENTRY -_mesa_GenBuffers(GLsizei n, GLuint *buffer) +static void +create_buffers(GLsizei n, GLuint *buffers, bool dsa) { GET_CURRENT_CONTEXT(ctx); GLuint first; GLint i; + struct gl_buffer_object *buf; + + const char *func = dsa ? "glCreateBuffers" : "glGenBuffers"; if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGenBuffers(%d)\n", n); + _mesa_debug(ctx, "%s(%d)\n", func, n); if (n < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGenBuffersARB"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(n %d < 0)", func, n); return; } - if (!buffer) { + if (!buffers) { return; } @@ -1323,16 +1320,53 @@ first = _mesa_HashFindFreeKeyBlock(ctx->Shared->BufferObjects, n); - /* Insert the ID and pointer to dummy buffer object into hash table */ + /* Insert the ID and pointer into the hash table. If non-DSA, insert a + * DummyBufferObject. Otherwise, create a new buffer object and insert + * it. + */ for (i = 0; i < n; i++) { - _mesa_HashInsert(ctx->Shared->BufferObjects, first + i, - &DummyBufferObject); - buffer[i] = first + i; + buffers[i] = first + i; + if (dsa) { + assert(ctx->Driver.NewBufferObject); + buf = ctx->Driver.NewBufferObject(ctx, buffers[i]); + if (!buf) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); + return; + } + } + else + buf = &DummyBufferObject; + + _mesa_HashInsert(ctx->Shared->BufferObjects, buffers[i], buf); } mtx_unlock(&ctx->Shared->Mutex); } +/** + * Generate a set of unique buffer object IDs and store them in \c buffers. + * + * \param n Number of IDs to generate. + * \param buffers Array of \c n locations to store the IDs. + */ +void GLAPIENTRY +_mesa_GenBuffers(GLsizei n, GLuint *buffers) +{ + create_buffers(n, buffers, false); +} + +/** + * Create a set of buffer objects and store their unique IDs in \c buffers. + * + * \param n Number of IDs to generate. + * \param buffers Array of \c n locations to store the IDs. + */ +void GLAPIENTRY +_mesa_CreateBuffers(GLsizei n, GLuint *buffers) +{ + create_buffers(n, buffers, true); +} + /** * Determine if ID is the name of a buffer object. @@ -1356,15 +1390,13 @@ } -void GLAPIENTRY -_mesa_BufferStorage(GLenum target, GLsizeiptr size, const GLvoid *data, - GLbitfield flags) +void +_mesa_buffer_storage(struct gl_context *ctx, struct gl_buffer_object *bufObj, + GLenum target, GLsizeiptr size, const GLvoid *data, + GLbitfield flags, const char *func) { - GET_CURRENT_CONTEXT(ctx); - struct gl_buffer_object *bufObj; - if (size <= 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glBufferStorage(size <= 0)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(size <= 0)", func); return; } @@ -1374,27 +1406,25 @@ GL_MAP_COHERENT_BIT | GL_DYNAMIC_STORAGE_BIT | GL_CLIENT_STORAGE_BIT)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glBufferStorage(flags)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(invalid flag bits set)", func); return; } if (flags & GL_MAP_PERSISTENT_BIT && !(flags & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT))) { - _mesa_error(ctx, GL_INVALID_VALUE, "glBufferStorage(flags!=READ/WRITE)"); + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(PERSISTENT and flags!=READ/WRITE)", func); return; } if (flags & GL_MAP_COHERENT_BIT && !(flags & GL_MAP_PERSISTENT_BIT)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glBufferStorage(flags!=PERSISTENT)"); + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(COHERENT and flags!=PERSISTENT)", func); return; } - bufObj = get_buffer(ctx, "glBufferStorage", target, GL_INVALID_OPERATION); - if (!bufObj) - return; - if (bufObj->Immutable) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferStorage(immutable)"); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(immutable)", func); return; } @@ -1414,31 +1444,65 @@ * glBufferStorage is not described in the spec, Graham Sellers * said that it should behave the same as glBufferData. */ - _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferStorage()"); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", func); } else { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBufferStorage()"); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); } } } +void GLAPIENTRY +_mesa_BufferStorage(GLenum target, GLsizeiptr size, const GLvoid *data, + GLbitfield flags) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + + bufObj = get_buffer(ctx, "glBufferStorage", target, GL_INVALID_OPERATION); + if (!bufObj) + return; + + _mesa_buffer_storage(ctx, bufObj, target, size, data, flags, + "glBufferStorage"); +} void GLAPIENTRY -_mesa_BufferData(GLenum target, GLsizeiptrARB size, - const GLvoid * data, GLenum usage) +_mesa_NamedBufferStorage(GLuint buffer, GLsizeiptr size, const GLvoid *data, + GLbitfield flags) { GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferStorage"); + if (!bufObj) + return; + + /* + * In direct state access, buffer objects have an unspecified target since + * they are not required to be bound. + */ + _mesa_buffer_storage(ctx, bufObj, GL_NONE, size, data, flags, + "glNamedBufferStorage"); +} + + +void +_mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj, + GLenum target, GLsizeiptr size, const GLvoid *data, + GLenum usage, const char *func) +{ bool valid_usage; if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glBufferData(%s, %ld, %p, %s)\n", + _mesa_debug(ctx, "%s(%s, %ld, %p, %s)\n", + func, _mesa_lookup_enum_by_nr(target), (long int) size, data, _mesa_lookup_enum_by_nr(usage)); if (size < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glBufferDataARB(size < 0)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(size < 0)", func); return; } @@ -1467,16 +1531,13 @@ } if (!valid_usage) { - _mesa_error(ctx, GL_INVALID_ENUM, "glBufferData(usage)"); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid usage: %s)", func, + _mesa_lookup_enum_by_nr(usage)); return; } - bufObj = get_buffer(ctx, "glBufferDataARB", target, GL_INVALID_OPERATION); - if (!bufObj) - return; - if (bufObj->Immutable) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferData(immutable)"); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(immutable)", func); return; } @@ -1509,33 +1570,73 @@ * EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, and the store cannot be * mapped to the GPU address space. */ - _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferData()"); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", func); } else { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBufferData()"); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); } } } +void GLAPIENTRY +_mesa_BufferData(GLenum target, GLsizeiptr size, + const GLvoid *data, GLenum usage) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + + bufObj = get_buffer(ctx, "glBufferData", target, GL_INVALID_OPERATION); + if (!bufObj) + return; + + _mesa_buffer_data(ctx, bufObj, target, size, data, usage, + "glBufferData"); +} void GLAPIENTRY -_mesa_BufferSubData(GLenum target, GLintptrARB offset, - GLsizeiptrARB size, const GLvoid * data) +_mesa_NamedBufferData(GLuint buffer, GLsizeiptr size, const GLvoid *data, + GLenum usage) { GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - bufObj = buffer_object_subdata_range_good( ctx, target, offset, size, - false, GL_INVALID_OPERATION, - "glBufferSubDataARB" ); - if (!bufObj) { + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferData"); + if (!bufObj) + return; + + /* In direct state access, buffer objects have an unspecified target since + * they are not required to be bound. + */ + _mesa_buffer_data(ctx, bufObj, GL_NONE, size, data, usage, + "glNamedBufferData"); +} + + +/** + * Implementation for glBufferSubData and glNamedBufferSubData. + * + * \param ctx GL context. + * \param bufObj The buffer object. + * \param offset Offset of the first byte of the subdata range. + * \param size Size, in bytes, of the subdata range. + * \param data The data store. + * \param func Name of calling function for recording errors. + * + */ +void +_mesa_buffer_sub_data(struct gl_context *ctx, struct gl_buffer_object *bufObj, + GLintptr offset, GLsizeiptr size, const GLvoid *data, + const char *func) +{ + if (!buffer_object_subdata_range_good(ctx, bufObj, offset, size, + false, func)) { /* error already recorded */ return; } if (bufObj->Immutable && !(bufObj->StorageFlags & GL_DYNAMIC_STORAGE_BIT)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferSubData"); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", func); return; } @@ -1545,105 +1646,107 @@ bufObj->Written = GL_TRUE; assert(ctx->Driver.BufferSubData); - ctx->Driver.BufferSubData( ctx, offset, size, data, bufObj ); + ctx->Driver.BufferSubData(ctx, offset, size, data, bufObj); } +void GLAPIENTRY +_mesa_BufferSubData(GLenum target, GLintptr offset, + GLsizeiptr size, const GLvoid *data) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + + bufObj = get_buffer(ctx, "glBufferSubData", target, GL_INVALID_OPERATION); + if (!bufObj) + return; + + _mesa_buffer_sub_data(ctx, bufObj, offset, size, data, "glBufferSubData"); +} void GLAPIENTRY -_mesa_GetBufferSubData(GLenum target, GLintptrARB offset, - GLsizeiptrARB size, void * data) +_mesa_NamedBufferSubData(GLuint buffer, GLintptr offset, + GLsizeiptr size, const GLvoid *data) { GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - bufObj = buffer_object_subdata_range_good(ctx, target, offset, size, - false, GL_INVALID_OPERATION, - "glGetBufferSubDataARB"); - if (!bufObj) { - /* error already recorded */ + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glNamedBufferSubData"); + if (!bufObj) return; - } - assert(ctx->Driver.GetBufferSubData); - ctx->Driver.GetBufferSubData( ctx, offset, size, data, bufObj ); + _mesa_buffer_sub_data(ctx, bufObj, offset, size, data, + "glNamedBufferSubData"); } void GLAPIENTRY -_mesa_ClearBufferData(GLenum target, GLenum internalformat, GLenum format, - GLenum type, const GLvoid* data) +_mesa_GetBufferSubData(GLenum target, GLintptr offset, + GLsizeiptr size, GLvoid *data) { GET_CURRENT_CONTEXT(ctx); - struct gl_buffer_object* bufObj; - mesa_format mesaFormat; - GLubyte clearValue[MAX_PIXEL_BYTES]; - GLsizeiptr clearValueSize; + struct gl_buffer_object *bufObj; - bufObj = get_buffer(ctx, "glClearBufferData", target, GL_INVALID_VALUE); - if (!bufObj) { + bufObj = get_buffer(ctx, "glGetBufferSubData", target, + GL_INVALID_OPERATION); + if (!bufObj) return; - } - if (_mesa_check_disallowed_mapping(bufObj)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glClearBufferData(buffer currently mapped)"); + if (!buffer_object_subdata_range_good(ctx, bufObj, offset, size, false, + "glGetBufferSubData")) { return; } - mesaFormat = validate_clear_buffer_format(ctx, internalformat, - format, type, - "glClearBufferData"); - if (mesaFormat == MESA_FORMAT_NONE) { - return; - } + assert(ctx->Driver.GetBufferSubData); + ctx->Driver.GetBufferSubData(ctx, offset, size, data, bufObj); +} - clearValueSize = _mesa_get_format_bytes(mesaFormat); - if (bufObj->Size % clearValueSize != 0) { - _mesa_error(ctx, GL_INVALID_VALUE, - "glClearBufferData(size is not a multiple of " - "internalformat size)"); - return; - } +void GLAPIENTRY +_mesa_GetNamedBufferSubData(GLuint buffer, GLintptr offset, + GLsizeiptr size, GLvoid *data) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; - if (data == NULL) { - /* clear to zeros, per the spec */ - ctx->Driver.ClearBufferSubData(ctx, 0, bufObj->Size, - NULL, clearValueSize, bufObj); + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, + "glGetNamedBufferSubData"); + if (!bufObj) return; - } - if (!convert_clear_buffer_data(ctx, mesaFormat, clearValue, - format, type, data, "glClearBufferData")) { + if (!buffer_object_subdata_range_good(ctx, bufObj, offset, size, false, + "glGetNamedBufferSubData")) { return; } - ctx->Driver.ClearBufferSubData(ctx, 0, bufObj->Size, - clearValue, clearValueSize, bufObj); + assert(ctx->Driver.GetBufferSubData); + ctx->Driver.GetBufferSubData(ctx, offset, size, data, bufObj); } -void GLAPIENTRY -_mesa_ClearBufferSubData(GLenum target, GLenum internalformat, - GLintptr offset, GLsizeiptr size, - GLenum format, GLenum type, - const GLvoid* data) +/** + * \param subdata true if caller is *SubData, false if *Data + */ +void +_mesa_clear_buffer_sub_data(struct gl_context *ctx, + struct gl_buffer_object *bufObj, + GLenum internalformat, + GLintptr offset, GLsizeiptr size, + GLenum format, GLenum type, + const GLvoid *data, + const char *func, bool subdata) { - GET_CURRENT_CONTEXT(ctx); - struct gl_buffer_object* bufObj; mesa_format mesaFormat; GLubyte clearValue[MAX_PIXEL_BYTES]; GLsizeiptr clearValueSize; - bufObj = buffer_object_subdata_range_good(ctx, target, offset, size, - true, GL_INVALID_VALUE, - "glClearBufferSubData"); - if (!bufObj) { + /* This checks for disallowed mappings. */ + if (!buffer_object_subdata_range_good(ctx, bufObj, offset, size, + subdata, func)) { return; } mesaFormat = validate_clear_buffer_format(ctx, internalformat, - format, type, - "glClearBufferSubData"); + format, type, func); + if (mesaFormat == MESA_FORMAT_NONE) { return; } @@ -1651,8 +1754,8 @@ clearValueSize = _mesa_get_format_bytes(mesaFormat); if (offset % clearValueSize != 0 || size % clearValueSize != 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glClearBufferSubData(offset or size is not a multiple of " - "internalformat size)"); + "%s(offset or size is not a multiple of " + "internalformat size)", func); return; } @@ -1666,8 +1769,7 @@ } if (!convert_clear_buffer_data(ctx, mesaFormat, clearValue, - format, type, data, - "glClearBufferSubData")) { + format, type, data, func)) { return; } @@ -1677,131 +1779,87 @@ } } - -void * GLAPIENTRY -_mesa_MapBuffer(GLenum target, GLenum access) +void GLAPIENTRY +_mesa_ClearBufferData(GLenum target, GLenum internalformat, GLenum format, + GLenum type, const GLvoid *data) { GET_CURRENT_CONTEXT(ctx); - struct gl_buffer_object * bufObj; - GLbitfield accessFlags; - void *map; - bool valid_access; + struct gl_buffer_object *bufObj; - ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, NULL); + bufObj = get_buffer(ctx, "glClearBufferData", target, GL_INVALID_VALUE); + if (!bufObj) + return; - switch (access) { - case GL_READ_ONLY_ARB: - accessFlags = GL_MAP_READ_BIT; - valid_access = _mesa_is_desktop_gl(ctx); - break; - case GL_WRITE_ONLY_ARB: - accessFlags = GL_MAP_WRITE_BIT; - valid_access = true; - break; - case GL_READ_WRITE_ARB: - accessFlags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT; - valid_access = _mesa_is_desktop_gl(ctx); - break; - default: - valid_access = false; - break; - } + _mesa_clear_buffer_sub_data(ctx, bufObj, internalformat, 0, bufObj->Size, + format, type, data, + "glClearBufferData", false); +} - if (!valid_access) { - _mesa_error(ctx, GL_INVALID_ENUM, "glMapBufferARB(access)"); - return NULL; - } +void GLAPIENTRY +_mesa_ClearNamedBufferData(GLuint buffer, GLenum internalformat, + GLenum format, GLenum type, const GLvoid *data) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; - bufObj = get_buffer(ctx, "glMapBufferARB", target, GL_INVALID_OPERATION); + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glClearNamedBufferData"); if (!bufObj) - return NULL; - - if (accessFlags & GL_MAP_READ_BIT && - !(bufObj->StorageFlags & GL_MAP_READ_BIT)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBuffer(invalid read flag)"); - return NULL; - } + return; - if (accessFlags & GL_MAP_WRITE_BIT && - !(bufObj->StorageFlags & GL_MAP_WRITE_BIT)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBuffer(invalid write flag)"); - return NULL; - } + _mesa_clear_buffer_sub_data(ctx, bufObj, internalformat, 0, bufObj->Size, + format, type, data, + "glClearNamedBufferData", false); +} - if (_mesa_bufferobj_mapped(bufObj, MAP_USER)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glMapBufferARB(already mapped)"); - return NULL; - } - if (!bufObj->Size) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glMapBuffer(buffer size = 0)"); - return NULL; - } +void GLAPIENTRY +_mesa_ClearBufferSubData(GLenum target, GLenum internalformat, + GLintptr offset, GLsizeiptr size, + GLenum format, GLenum type, + const GLvoid *data) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; - assert(ctx->Driver.MapBufferRange); - map = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, accessFlags, bufObj, - MAP_USER); - if (!map) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)"); - return NULL; - } - else { - /* The driver callback should have set these fields. - * This is important because other modules (like VBO) might call - * the driver function directly. - */ - assert(bufObj->Mappings[MAP_USER].Pointer == map); - assert(bufObj->Mappings[MAP_USER].Length == bufObj->Size); - assert(bufObj->Mappings[MAP_USER].Offset == 0); - bufObj->Mappings[MAP_USER].AccessFlags = accessFlags; - } + bufObj = get_buffer(ctx, "glClearBufferSubData", target, GL_INVALID_VALUE); + if (!bufObj) + return; - if (access == GL_WRITE_ONLY_ARB || access == GL_READ_WRITE_ARB) - bufObj->Written = GL_TRUE; + _mesa_clear_buffer_sub_data(ctx, bufObj, internalformat, offset, size, + format, type, data, + "glClearBufferSubData", true); +} -#ifdef VBO_DEBUG - printf("glMapBufferARB(%u, sz %ld, access 0x%x)\n", - bufObj->Name, bufObj->Size, access); - if (access == GL_WRITE_ONLY_ARB) { - GLuint i; - GLubyte *b = (GLubyte *) bufObj->Pointer; - for (i = 0; i < bufObj->Size; i++) - b[i] = i & 0xff; - } -#endif +void GLAPIENTRY +_mesa_ClearNamedBufferSubData(GLuint buffer, GLenum internalformat, + GLintptr offset, GLsizeiptr size, + GLenum format, GLenum type, + const GLvoid *data) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; -#ifdef BOUNDS_CHECK - { - GLubyte *buf = (GLubyte *) bufObj->Pointer; - GLuint i; - /* buffer is 100 bytes larger than requested, fill with magic value */ - for (i = 0; i < 100; i++) { - buf[bufObj->Size - i - 1] = 123; - } - } -#endif + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, + "glClearNamedBufferSubData"); + if (!bufObj) + return; - return bufObj->Mappings[MAP_USER].Pointer; + _mesa_clear_buffer_sub_data(ctx, bufObj, internalformat, offset, size, + format, type, data, + "glClearNamedBufferSubData", true); } -GLboolean GLAPIENTRY -_mesa_UnmapBuffer(GLenum target) +GLboolean +_mesa_unmap_buffer(struct gl_context *ctx, struct gl_buffer_object *bufObj, + const char *func) { - GET_CURRENT_CONTEXT(ctx); - struct gl_buffer_object *bufObj; GLboolean status = GL_TRUE; ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE); - bufObj = get_buffer(ctx, "glUnmapBufferARB", target, GL_INVALID_OPERATION); - if (!bufObj) - return GL_FALSE; - if (!_mesa_bufferobj_mapped(bufObj, MAP_USER)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glUnmapBufferARB"); + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(buffer is not mapped)", func); return GL_FALSE; } @@ -1850,129 +1908,164 @@ return status; } +GLboolean GLAPIENTRY +_mesa_UnmapBuffer(GLenum target) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; -void GLAPIENTRY -_mesa_GetBufferParameteriv(GLenum target, GLenum pname, GLint *params) + bufObj = get_buffer(ctx, "glUnmapBuffer", target, GL_INVALID_OPERATION); + if (!bufObj) + return GL_FALSE; + + return _mesa_unmap_buffer(ctx, bufObj, "glUnmapBuffer"); +} + +GLboolean GLAPIENTRY +_mesa_UnmapNamedBuffer(GLuint buffer) { GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; - bufObj = get_buffer(ctx, "glGetBufferParameterivARB", target, - GL_INVALID_OPERATION); + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glUnmapNamedBuffer"); if (!bufObj) - return; + return GL_FALSE; + + return _mesa_unmap_buffer(ctx, bufObj, "glUnmapNamedBuffer"); +} + +static bool +get_buffer_parameter(struct gl_context *ctx, + struct gl_buffer_object *bufObj, GLenum pname, + GLint64 *params, const char *func) +{ switch (pname) { case GL_BUFFER_SIZE_ARB: - *params = (GLint) bufObj->Size; - return; + *params = bufObj->Size; + break; case GL_BUFFER_USAGE_ARB: *params = bufObj->Usage; - return; + break; case GL_BUFFER_ACCESS_ARB: *params = simplified_access_mode(ctx, bufObj->Mappings[MAP_USER].AccessFlags); - return; + break; case GL_BUFFER_MAPPED_ARB: *params = _mesa_bufferobj_mapped(bufObj, MAP_USER); - return; + break; case GL_BUFFER_ACCESS_FLAGS: if (!ctx->Extensions.ARB_map_buffer_range) goto invalid_pname; *params = bufObj->Mappings[MAP_USER].AccessFlags; - return; + break; case GL_BUFFER_MAP_OFFSET: if (!ctx->Extensions.ARB_map_buffer_range) goto invalid_pname; - *params = (GLint) bufObj->Mappings[MAP_USER].Offset; - return; + *params = bufObj->Mappings[MAP_USER].Offset; + break; case GL_BUFFER_MAP_LENGTH: if (!ctx->Extensions.ARB_map_buffer_range) goto invalid_pname; - *params = (GLint) bufObj->Mappings[MAP_USER].Length; - return; + *params = bufObj->Mappings[MAP_USER].Length; + break; case GL_BUFFER_IMMUTABLE_STORAGE: if (!ctx->Extensions.ARB_buffer_storage) goto invalid_pname; *params = bufObj->Immutable; - return; + break; case GL_BUFFER_STORAGE_FLAGS: if (!ctx->Extensions.ARB_buffer_storage) goto invalid_pname; *params = bufObj->StorageFlags; - return; + break; default: - ; /* fall-through */ + goto invalid_pname; } + return true; + invalid_pname: - _mesa_error(ctx, GL_INVALID_ENUM, "glGetBufferParameterivARB(pname=%s)", + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname: %s)", func, _mesa_lookup_enum_by_nr(pname)); + return false; } +void GLAPIENTRY +_mesa_GetBufferParameteriv(GLenum target, GLenum pname, GLint *params) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + GLint64 parameter; + + bufObj = get_buffer(ctx, "glGetBufferParameteriv", target, + GL_INVALID_OPERATION); + if (!bufObj) + return; + + if (!get_buffer_parameter(ctx, bufObj, pname, ¶meter, + "glGetBufferParameteriv")) + return; /* Error already recorded. */ + + *params = (GLint) parameter; +} -/** - * New in GL 3.2 - * This is pretty much a duplicate of GetBufferParameteriv() but the - * GL_BUFFER_SIZE_ARB attribute will be 64-bits on a 64-bit system. - */ void GLAPIENTRY _mesa_GetBufferParameteri64v(GLenum target, GLenum pname, GLint64 *params) { GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + GLint64 parameter; bufObj = get_buffer(ctx, "glGetBufferParameteri64v", target, GL_INVALID_OPERATION); if (!bufObj) return; - switch (pname) { - case GL_BUFFER_SIZE_ARB: - *params = bufObj->Size; - return; - case GL_BUFFER_USAGE_ARB: - *params = bufObj->Usage; - return; - case GL_BUFFER_ACCESS_ARB: - *params = simplified_access_mode(ctx, - bufObj->Mappings[MAP_USER].AccessFlags); - return; - case GL_BUFFER_ACCESS_FLAGS: - if (!ctx->Extensions.ARB_map_buffer_range) - goto invalid_pname; - *params = bufObj->Mappings[MAP_USER].AccessFlags; - return; - case GL_BUFFER_MAPPED_ARB: - *params = _mesa_bufferobj_mapped(bufObj, MAP_USER); - return; - case GL_BUFFER_MAP_OFFSET: - if (!ctx->Extensions.ARB_map_buffer_range) - goto invalid_pname; - *params = bufObj->Mappings[MAP_USER].Offset; - return; - case GL_BUFFER_MAP_LENGTH: - if (!ctx->Extensions.ARB_map_buffer_range) - goto invalid_pname; - *params = bufObj->Mappings[MAP_USER].Length; - return; - case GL_BUFFER_IMMUTABLE_STORAGE: - if (!ctx->Extensions.ARB_buffer_storage) - goto invalid_pname; - *params = bufObj->Immutable; + if (!get_buffer_parameter(ctx, bufObj, pname, ¶meter, + "glGetBufferParameteri64v")) + return; /* Error already recorded. */ + + *params = parameter; +} + +void GLAPIENTRY +_mesa_GetNamedBufferParameteriv(GLuint buffer, GLenum pname, GLint *params) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + GLint64 parameter; + + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, + "glGetNamedBufferParameteriv"); + if (!bufObj) return; - case GL_BUFFER_STORAGE_FLAGS: - if (!ctx->Extensions.ARB_buffer_storage) - goto invalid_pname; - *params = bufObj->StorageFlags; + + if (!get_buffer_parameter(ctx, bufObj, pname, ¶meter, + "glGetNamedBufferParameteriv")) + return; /* Error already recorded. */ + + *params = (GLint) parameter; +} + +void GLAPIENTRY +_mesa_GetNamedBufferParameteri64v(GLuint buffer, GLenum pname, + GLint64 *params) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + GLint64 parameter; + + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, + "glGetNamedBufferParameteri64v"); + if (!bufObj) return; - default: - ; /* fall-through */ - } -invalid_pname: - _mesa_error(ctx, GL_INVALID_ENUM, "glGetBufferParameteri64v(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + if (!get_buffer_parameter(ctx, bufObj, pname, ¶meter, + "glGetNamedBufferParameteri64v")) + return; /* Error already recorded. */ + + *params = parameter; } @@ -1980,14 +2073,15 @@ _mesa_GetBufferPointerv(GLenum target, GLenum pname, GLvoid **params) { GET_CURRENT_CONTEXT(ctx); - struct gl_buffer_object * bufObj; + struct gl_buffer_object *bufObj; - if (pname != GL_BUFFER_MAP_POINTER_ARB) { - _mesa_error(ctx, GL_INVALID_ENUM, "glGetBufferPointervARB(pname)"); + if (pname != GL_BUFFER_MAP_POINTER) { + _mesa_error(ctx, GL_INVALID_ENUM, "glGetBufferPointerv(pname != " + "GL_BUFFER_MAP_POINTER)"); return; } - bufObj = get_buffer(ctx, "glGetBufferPointervARB", target, + bufObj = get_buffer(ctx, "glGetBufferPointerv", target, GL_INVALID_OPERATION); if (!bufObj) return; @@ -1995,66 +2089,75 @@ *params = bufObj->Mappings[MAP_USER].Pointer; } - void GLAPIENTRY -_mesa_CopyBufferSubData(GLenum readTarget, GLenum writeTarget, - GLintptr readOffset, GLintptr writeOffset, - GLsizeiptr size) +_mesa_GetNamedBufferPointerv(GLuint buffer, GLenum pname, GLvoid **params) { GET_CURRENT_CONTEXT(ctx); - struct gl_buffer_object *src, *dst; + struct gl_buffer_object *bufObj; - src = get_buffer(ctx, "glCopyBufferSubData", readTarget, - GL_INVALID_OPERATION); - if (!src) + if (pname != GL_BUFFER_MAP_POINTER) { + _mesa_error(ctx, GL_INVALID_ENUM, "glGetNamedBufferPointerv(pname != " + "GL_BUFFER_MAP_POINTER)"); return; + } - dst = get_buffer(ctx, "glCopyBufferSubData", writeTarget, - GL_INVALID_OPERATION); - if (!dst) + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, + "glGetNamedBufferPointerv"); + if (!bufObj) return; + *params = bufObj->Mappings[MAP_USER].Pointer; +} + + +void +_mesa_copy_buffer_sub_data(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr readOffset, GLintptr writeOffset, + GLsizeiptr size, const char *func) +{ if (_mesa_check_disallowed_mapping(src)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glCopyBufferSubData(readBuffer is mapped)"); + "%s(readBuffer is mapped)", func); return; } if (_mesa_check_disallowed_mapping(dst)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glCopyBufferSubData(writeBuffer is mapped)"); + "%s(writeBuffer is mapped)", func); return; } if (readOffset < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glCopyBufferSubData(readOffset = %d)", (int) readOffset); + "%s(readOffset %d < 0)", func, (int) readOffset); return; } if (writeOffset < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glCopyBufferSubData(writeOffset = %d)", (int) writeOffset); + "%s(writeOffset %d < 0)", func, (int) writeOffset); return; } if (size < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glCopyBufferSubData(writeOffset = %d)", (int) size); + "%s(size %d < 0)", func, (int) size); return; } if (readOffset + size > src->Size) { _mesa_error(ctx, GL_INVALID_VALUE, - "glCopyBufferSubData(readOffset + size = %d)", - (int) (readOffset + size)); + "%s(readOffset %d + size %d > src_buffer_size %d)", func, + (int) readOffset, (int) size, (int) src->Size); return; } if (writeOffset + size > dst->Size) { _mesa_error(ctx, GL_INVALID_VALUE, - "glCopyBufferSubData(writeOffset + size = %d)", - (int) (writeOffset + size)); + "%s(writeOffset %d + size %d > dst_buffer_size %d)", func, + (int) writeOffset, (int) size, (int) dst->Size); return; } @@ -2068,7 +2171,7 @@ else { /* overlapping src/dst is illegal */ _mesa_error(ctx, GL_INVALID_VALUE, - "glCopyBufferSubData(overlapping src/dst)"); + "%s(overlapping src/dst)", func); return; } } @@ -2076,36 +2179,71 @@ ctx->Driver.CopyBufferSubData(ctx, src, dst, readOffset, writeOffset, size); } +void GLAPIENTRY +_mesa_CopyBufferSubData(GLenum readTarget, GLenum writeTarget, + GLintptr readOffset, GLintptr writeOffset, + GLsizeiptr size) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *src, *dst; -/** - * See GL_ARB_map_buffer_range spec - */ -void * GLAPIENTRY -_mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, - GLbitfield access) + src = get_buffer(ctx, "glCopyBufferSubData", readTarget, + GL_INVALID_OPERATION); + if (!src) + return; + + dst = get_buffer(ctx, "glCopyBufferSubData", writeTarget, + GL_INVALID_OPERATION); + if (!dst) + return; + + _mesa_copy_buffer_sub_data(ctx, src, dst, readOffset, writeOffset, size, + "glCopyBufferSubData"); +} + +void GLAPIENTRY +_mesa_CopyNamedBufferSubData(GLuint readBuffer, GLuint writeBuffer, + GLintptr readOffset, GLintptr writeOffset, + GLsizeiptr size) { GET_CURRENT_CONTEXT(ctx); - struct gl_buffer_object *bufObj; + struct gl_buffer_object *src, *dst; + + src = _mesa_lookup_bufferobj_err(ctx, readBuffer, + "glCopyNamedBufferSubData"); + if (!src) + return; + + dst = _mesa_lookup_bufferobj_err(ctx, writeBuffer, + "glCopyNamedBufferSubData"); + if (!dst) + return; + + _mesa_copy_buffer_sub_data(ctx, src, dst, readOffset, writeOffset, size, + "glCopyNamedBufferSubData"); +} + + +void * +_mesa_map_buffer_range(struct gl_context *ctx, + struct gl_buffer_object *bufObj, + GLintptr offset, GLsizeiptr length, + GLbitfield access, const char *func) +{ void *map; GLbitfield allowed_access; ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, NULL); - if (!ctx->Extensions.ARB_map_buffer_range) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(extension not supported)"); - return NULL; - } - if (offset < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glMapBufferRange(offset = %ld)", (long)offset); + "%s(offset %ld < 0)", func, (long) offset); return NULL; } if (length < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glMapBufferRange(length = %ld)", (long)length); + "%s(length %ld < 0)", func, (long) length); return NULL; } @@ -2115,10 +2253,13 @@ * conditions: * * * is zero." + * + * Additionally, page 94 of the PDF of the OpenGL 4.5 core spec + * (30.10.2014) also says this, so it's no longer allowed for desktop GL, + * either. */ - if (_mesa_is_gles(ctx) && length == 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(length = 0)"); + if (length == 0) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(length = 0)", func); return NULL; } @@ -2135,14 +2276,15 @@ } if (access & ~allowed_access) { - /* generate an error if any other than allowed bit is set */ - _mesa_error(ctx, GL_INVALID_VALUE, "glMapBufferRange(access)"); + /* generate an error if any bits other than those allowed are set */ + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(access has undefined bits set)", func); return NULL; } if ((access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(access indicates neither read or write)"); + "%s(access indicates neither read or write)", func); return NULL; } @@ -2151,82 +2293,69 @@ GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT))) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(invalid access flags)"); + "%s(read access with disallowed bits)", func); return NULL; } if ((access & GL_MAP_FLUSH_EXPLICIT_BIT) && ((access & GL_MAP_WRITE_BIT) == 0)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(invalid access flags)"); + "%s(access has flush explicit without write)", func); return NULL; } - bufObj = get_buffer(ctx, "glMapBufferRange", target, GL_INVALID_OPERATION); - if (!bufObj) - return NULL; - if (access & GL_MAP_READ_BIT && !(bufObj->StorageFlags & GL_MAP_READ_BIT)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(invalid read flag)"); + "%s(buffer does not allow read access)", func); return NULL; } if (access & GL_MAP_WRITE_BIT && !(bufObj->StorageFlags & GL_MAP_WRITE_BIT)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(invalid write flag)"); + "%s(buffer does not allow write access)", func); return NULL; } if (access & GL_MAP_COHERENT_BIT && !(bufObj->StorageFlags & GL_MAP_COHERENT_BIT)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(invalid coherent flag)"); + "%s(buffer does not allow coherent access)", func); return NULL; } if (access & GL_MAP_PERSISTENT_BIT && !(bufObj->StorageFlags & GL_MAP_PERSISTENT_BIT)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(invalid persistent flag)"); + "%s(buffer does not allow persistent access)", func); return NULL; } if (offset + length > bufObj->Size) { _mesa_error(ctx, GL_INVALID_VALUE, - "glMapBufferRange(offset + length > size)"); + "%s(offset %ld + length %ld > buffer_size %ld)", func, + offset, length, bufObj->Size); return NULL; } if (_mesa_bufferobj_mapped(bufObj, MAP_USER)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glMapBufferRange(buffer already mapped)"); + "%s(buffer already mapped)", func); return NULL; } if (!bufObj->Size) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glMapBufferRange(buffer size = 0)"); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s(buffer size = 0)", func); return NULL; } - /* Mapping zero bytes should return a non-null pointer. */ - if (!length) { - static long dummy = 0; - bufObj->Mappings[MAP_USER].Pointer = &dummy; - bufObj->Mappings[MAP_USER].Length = length; - bufObj->Mappings[MAP_USER].Offset = offset; - bufObj->Mappings[MAP_USER].AccessFlags = access; - return bufObj->Mappings[MAP_USER].Pointer; - } assert(ctx->Driver.MapBufferRange); map = ctx->Driver.MapBufferRange(ctx, offset, length, access, bufObj, MAP_USER); if (!map) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)"); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s(map failed)", func); } else { /* The driver callback should have set all these fields. @@ -2239,61 +2368,189 @@ assert(bufObj->Mappings[MAP_USER].AccessFlags == access); } + if (access & GL_MAP_WRITE_BIT) + bufObj->Written = GL_TRUE; + +#ifdef VBO_DEBUG + if (strstr(func, "Range") == NULL) { /* If not MapRange */ + printf("glMapBuffer(%u, sz %ld, access 0x%x)\n", + bufObj->Name, bufObj->Size, access); + /* Access must be write only */ + if ((access & GL_MAP_WRITE_BIT) && (!(access & ~GL_MAP_WRITE_BIT))) { + GLuint i; + GLubyte *b = (GLubyte *) bufObj->Pointer; + for (i = 0; i < bufObj->Size; i++) + b[i] = i & 0xff; + } + } +#endif + +#ifdef BOUNDS_CHECK + if (strstr(func, "Range") == NULL) { /* If not MapRange */ + GLubyte *buf = (GLubyte *) bufObj->Pointer; + GLuint i; + /* buffer is 100 bytes larger than requested, fill with magic value */ + for (i = 0; i < 100; i++) { + buf[bufObj->Size - i - 1] = 123; + } + } +#endif + return map; } +void * GLAPIENTRY +_mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, + GLbitfield access) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + + if (!ctx->Extensions.ARB_map_buffer_range) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glMapBufferRange(ARB_map_buffer_range not supported)"); + return NULL; + } + + bufObj = get_buffer(ctx, "glMapBufferRange", target, GL_INVALID_OPERATION); + if (!bufObj) + return NULL; + + return _mesa_map_buffer_range(ctx, bufObj, offset, length, access, + "glMapBufferRange"); +} + +void * GLAPIENTRY +_mesa_MapNamedBufferRange(GLuint buffer, GLintptr offset, GLsizeiptr length, + GLbitfield access) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + + if (!ctx->Extensions.ARB_map_buffer_range) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glMapNamedBufferRange(" + "ARB_map_buffer_range not supported)"); + return NULL; + } + + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glMapNamedBufferRange"); + if (!bufObj) + return NULL; + + return _mesa_map_buffer_range(ctx, bufObj, offset, length, access, + "glMapNamedBufferRange"); +} /** - * See GL_ARB_map_buffer_range spec + * Converts GLenum access from MapBuffer and MapNamedBuffer into + * flags for input to _mesa_map_buffer_range. + * + * \return true if the type of requested access is permissible. */ -void GLAPIENTRY -_mesa_FlushMappedBufferRange(GLenum target, GLintptr offset, GLsizeiptr length) +static bool +get_map_buffer_access_flags(struct gl_context *ctx, GLenum access, + GLbitfield *flags) +{ + switch (access) { + case GL_READ_ONLY_ARB: + *flags = GL_MAP_READ_BIT; + return _mesa_is_desktop_gl(ctx); + case GL_WRITE_ONLY_ARB: + *flags = GL_MAP_WRITE_BIT; + return true; + case GL_READ_WRITE_ARB: + *flags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT; + return _mesa_is_desktop_gl(ctx); + default: + return false; + } +} + +void * GLAPIENTRY +_mesa_MapBuffer(GLenum target, GLenum access) { GET_CURRENT_CONTEXT(ctx); struct gl_buffer_object *bufObj; + GLbitfield accessFlags; + + if (!get_map_buffer_access_flags(ctx, access, &accessFlags)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glMapBuffer(invalid access)"); + return NULL; + } + + bufObj = get_buffer(ctx, "glMapBuffer", target, GL_INVALID_OPERATION); + if (!bufObj) + return NULL; + + return _mesa_map_buffer_range(ctx, bufObj, 0, bufObj->Size, accessFlags, + "glMapBuffer"); +} + +void * GLAPIENTRY +_mesa_MapNamedBuffer(GLuint buffer, GLenum access) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + GLbitfield accessFlags; + if (!get_map_buffer_access_flags(ctx, access, &accessFlags)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glMapNamedBuffer(invalid access)"); + return NULL; + } + + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, "glMapNamedBuffer"); + if (!bufObj) + return NULL; + + return _mesa_map_buffer_range(ctx, bufObj, 0, bufObj->Size, accessFlags, + "glMapNamedBuffer"); +} + + +void +_mesa_flush_mapped_buffer_range(struct gl_context *ctx, + struct gl_buffer_object *bufObj, + GLintptr offset, GLsizeiptr length, + const char *func) +{ if (!ctx->Extensions.ARB_map_buffer_range) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glFlushMappedBufferRange(extension not supported)"); + "%s(ARB_map_buffer_range not supported)", func); return; } if (offset < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glFlushMappedBufferRange(offset = %ld)", (long)offset); + "%s(offset %ld < 0)", func, (long) offset); return; } if (length < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glFlushMappedBufferRange(length = %ld)", (long)length); + "%s(length %ld < 0)", func, (long) length); return; } - bufObj = get_buffer(ctx, "glFlushMappedBufferRange", target, - GL_INVALID_OPERATION); - if (!bufObj) - return; - if (!_mesa_bufferobj_mapped(bufObj, MAP_USER)) { /* buffer is not mapped */ _mesa_error(ctx, GL_INVALID_OPERATION, - "glFlushMappedBufferRange(buffer is not mapped)"); + "%s(buffer is not mapped)", func); return; } if ((bufObj->Mappings[MAP_USER].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT) == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glFlushMappedBufferRange(GL_MAP_FLUSH_EXPLICIT_BIT not set)"); + "%s(GL_MAP_FLUSH_EXPLICIT_BIT not set)", func); return; } if (offset + length > bufObj->Mappings[MAP_USER].Length) { _mesa_error(ctx, GL_INVALID_VALUE, - "glFlushMappedBufferRange(offset %ld + length %ld > mapped length %ld)", - (long)offset, (long)length, - (long)bufObj->Mappings[MAP_USER].Length); + "%s(offset %ld + length %ld > mapped length %ld)", func, + (long) offset, (long) length, + (long) bufObj->Mappings[MAP_USER].Length); return; } @@ -2304,6 +2561,38 @@ MAP_USER); } +void GLAPIENTRY +_mesa_FlushMappedBufferRange(GLenum target, GLintptr offset, + GLsizeiptr length) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + + bufObj = get_buffer(ctx, "glFlushMappedBufferRange", target, + GL_INVALID_OPERATION); + if (!bufObj) + return; + + _mesa_flush_mapped_buffer_range(ctx, bufObj, offset, length, + "glFlushMappedBufferRange"); +} + +void GLAPIENTRY +_mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset, + GLsizeiptr length) +{ + GET_CURRENT_CONTEXT(ctx); + struct gl_buffer_object *bufObj; + + bufObj = _mesa_lookup_bufferobj_err(ctx, buffer, + "glFlushMappedNamedBufferRange"); + if (!bufObj) + return; + + _mesa_flush_mapped_buffer_range(ctx, bufObj, offset, length, + "glFlushMappedNamedBufferRange"); +} + static GLenum buffer_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option) diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/bufferobj.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/bufferobj.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/bufferobj.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/bufferobj.h 2015-03-18 08:43:35.000000000 +0000 @@ -130,15 +130,62 @@ _mesa_init_buffer_object_functions(struct dd_function_table *driver); extern void +_mesa_buffer_storage(struct gl_context *ctx, struct gl_buffer_object *bufObj, + GLenum target, GLsizeiptr size, const GLvoid *data, + GLbitfield flags, const char *func); + +extern void +_mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj, + GLenum target, GLsizeiptr size, const GLvoid *data, + GLenum usage, const char *func); + +extern void +_mesa_buffer_sub_data(struct gl_context *ctx, struct gl_buffer_object *bufObj, + GLintptr offset, GLsizeiptr size, const GLvoid *data, + const char *func); + +extern void _mesa_buffer_unmap_all_mappings(struct gl_context *ctx, struct gl_buffer_object *bufObj); extern void -_mesa_buffer_clear_subdata(struct gl_context *ctx, - GLintptr offset, GLsizeiptr size, - const GLvoid *clearValue, - GLsizeiptr clearValueSize, - struct gl_buffer_object *bufObj); +_mesa_copy_buffer_sub_data(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr readOffset, GLintptr writeOffset, + GLsizeiptr size, const char *func); + +extern void * +_mesa_map_buffer_range(struct gl_context *ctx, + struct gl_buffer_object *bufObj, + GLintptr offset, GLsizeiptr length, + GLbitfield access, const char *func); + +extern void +_mesa_flush_mapped_buffer_range(struct gl_context *ctx, + struct gl_buffer_object *bufObj, + GLintptr offset, GLsizeiptr length, + const char *func); + +extern void +_mesa_ClearBufferSubData_sw(struct gl_context *ctx, + GLintptr offset, GLsizeiptr size, + const GLvoid *clearValue, + GLsizeiptr clearValueSize, + struct gl_buffer_object *bufObj); + +extern void +_mesa_clear_buffer_sub_data(struct gl_context *ctx, + struct gl_buffer_object *bufObj, + GLenum internalformat, + GLintptr offset, GLsizeiptr size, + GLenum format, GLenum type, + const GLvoid *data, + const char *func, bool subdata); + +extern GLboolean +_mesa_unmap_buffer(struct gl_context *ctx, struct gl_buffer_object *bufObj, + const char *func); /* * API functions @@ -150,7 +197,10 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint * buffer); void GLAPIENTRY -_mesa_GenBuffers(GLsizei n, GLuint * buffer); +_mesa_GenBuffers(GLsizei n, GLuint *buffers); + +void GLAPIENTRY +_mesa_CreateBuffers(GLsizei n, GLuint *buffers); GLboolean GLAPIENTRY _mesa_IsBuffer(GLuint buffer); @@ -160,34 +210,61 @@ GLbitfield flags); void GLAPIENTRY -_mesa_BufferData(GLenum target, GLsizeiptrARB size, - const GLvoid * data, GLenum usage); +_mesa_NamedBufferStorage(GLuint buffer, GLsizeiptr size, const GLvoid *data, + GLbitfield flags); + +void GLAPIENTRY +_mesa_BufferData(GLenum target, GLsizeiptr size, + const GLvoid *data, GLenum usage); + +void GLAPIENTRY +_mesa_NamedBufferData(GLuint buffer, GLsizeiptr size, + const GLvoid *data, GLenum usage); void GLAPIENTRY -_mesa_BufferSubData(GLenum target, GLintptrARB offset, - GLsizeiptrARB size, const GLvoid * data); +_mesa_BufferSubData(GLenum target, GLintptr offset, + GLsizeiptr size, const GLvoid *data); void GLAPIENTRY -_mesa_GetBufferSubData(GLenum target, GLintptrARB offset, - GLsizeiptrARB size, void * data); +_mesa_NamedBufferSubData(GLuint buffer, GLintptr offset, + GLsizeiptr size, const GLvoid *data); + +void GLAPIENTRY +_mesa_GetBufferSubData(GLenum target, GLintptr offset, + GLsizeiptr size, GLvoid *data); + +void GLAPIENTRY +_mesa_GetNamedBufferSubData(GLuint buffer, GLintptr offset, + GLsizeiptr size, GLvoid *data); void GLAPIENTRY _mesa_ClearBufferData(GLenum target, GLenum internalformat, GLenum format, GLenum type, - const GLvoid * data); + const GLvoid *data); + +void GLAPIENTRY +_mesa_ClearNamedBufferData(GLuint buffer, GLenum internalformat, + GLenum format, GLenum type, + const GLvoid *data); void GLAPIENTRY _mesa_ClearBufferSubData(GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, - const GLvoid * data); + const GLvoid *data); -void * GLAPIENTRY -_mesa_MapBuffer(GLenum target, GLenum access); +void GLAPIENTRY +_mesa_ClearNamedBufferSubData(GLuint buffer, GLenum internalformat, + GLintptr offset, GLsizeiptr size, + GLenum format, GLenum type, + const GLvoid *data); GLboolean GLAPIENTRY _mesa_UnmapBuffer(GLenum target); +GLboolean GLAPIENTRY +_mesa_UnmapNamedBuffer(GLuint buffer); + void GLAPIENTRY _mesa_GetBufferParameteriv(GLenum target, GLenum pname, GLint *params); @@ -195,21 +272,52 @@ _mesa_GetBufferParameteri64v(GLenum target, GLenum pname, GLint64 *params); void GLAPIENTRY +_mesa_GetNamedBufferParameteriv(GLuint buffer, GLenum pname, GLint *params); + +void GLAPIENTRY +_mesa_GetNamedBufferParameteri64v(GLuint buffer, GLenum pname, + GLint64 *params); + +void GLAPIENTRY _mesa_GetBufferPointerv(GLenum target, GLenum pname, GLvoid **params); void GLAPIENTRY +_mesa_GetNamedBufferPointerv(GLuint buffer, GLenum pname, GLvoid **params); + + +void GLAPIENTRY _mesa_CopyBufferSubData(GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +void GLAPIENTRY +_mesa_CopyNamedBufferSubData(GLuint readBuffer, GLuint writeBuffer, + GLintptr readOffset, GLintptr writeOffset, + GLsizeiptr size); + void * GLAPIENTRY _mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +void * GLAPIENTRY +_mesa_MapNamedBufferRange(GLuint buffer, GLintptr offset, GLsizeiptr length, + GLbitfield access); + +void * GLAPIENTRY +_mesa_MapBuffer(GLenum target, GLenum access); + +void * GLAPIENTRY +_mesa_MapNamedBuffer(GLuint buffer, GLenum access); + + void GLAPIENTRY _mesa_FlushMappedBufferRange(GLenum target, GLintptr offset, GLsizeiptr length); +void GLAPIENTRY +_mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset, + GLsizeiptr length); + GLenum GLAPIENTRY _mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/compiler.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/compiler.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/compiler.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/compiler.h 2015-03-18 08:43:35.000000000 +0000 @@ -57,9 +57,6 @@ # elif !defined(__sparc__) && defined(__sparc) # define __sparc__ # endif -# if !defined(__volatile) -# define __volatile volatile -# endif #endif @@ -101,14 +98,6 @@ #elif defined(__APPLE__) #include #define CPU_TO_LE32( x ) CFSwapInt32HostToLittle( x ) -#elif (defined(_AIX)) -static inline GLuint CPU_TO_LE32(GLuint x) -{ - return (((x & 0x000000ff) << 24) | - ((x & 0x0000ff00) << 8) | - ((x & 0x00ff0000) >> 8) | - ((x & 0xff000000) >> 24)); -} #elif defined(__OpenBSD__) #include #define CPU_TO_LE32( x ) htole32( x ) @@ -125,34 +114,6 @@ -/** - * Create a macro so that asm functions can be linked into compilers other - * than GNU C - */ -#ifndef _ASMAPI -#if defined(_WIN32) -#define _ASMAPI __cdecl -#else -#define _ASMAPI -#endif -#ifdef PTR_DECL_IN_FRONT -#define _ASMAPIP * _ASMAPI -#else -#define _ASMAPIP _ASMAPI * -#endif -#endif - - -/** - * LONGSTRING macro - * gcc -pedantic warns about long string literals, LONGSTRING silences that. - */ -#if !defined(__GNUC__) -# define LONGSTRING -#else -# define LONGSTRING __extension__ -#endif - #define IEEE_ONE 0x3f800000 diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/copyimage.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/copyimage.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/copyimage.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/copyimage.c 2015-03-18 08:43:35.000000000 +0000 @@ -33,6 +33,12 @@ #include "texobj.h" #include "fbobject.h" #include "textureview.h" +#include "glformats.h" + +enum mesa_block_class { + BLOCK_CLASS_128_BITS, + BLOCK_CLASS_64_BITS +}; static bool prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, @@ -253,6 +259,124 @@ return true; } +static bool +compressed_format_compatible(struct gl_context *ctx, + GLenum compressedFormat, GLenum otherFormat) +{ + enum mesa_block_class compressedClass, otherClass; + + /* Two view-incompatible compressed formats are never compatible. */ + if (_mesa_is_compressed_format(ctx, otherFormat)) { + return false; + } + + /* + * From ARB_copy_image spec: + * Table 4.X.1 (Compatible internal formats for copying between + * compressed and uncompressed internal formats) + * --------------------------------------------------------------------- + * | Texel / | Uncompressed | | + * | Block | internal format | Compressed internal format | + * | size | | | + * --------------------------------------------------------------------- + * | 128-bit | RGBA32UI, | COMPRESSED_RGBA_S3TC_DXT3_EXT, | + * | | RGBA32I, | COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,| + * | | RGBA32F | COMPRESSED_RGBA_S3TC_DXT5_EXT, | + * | | | COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,| + * | | | COMPRESSED_RG_RGTC2, | + * | | | COMPRESSED_SIGNED_RG_RGTC2, | + * | | | COMPRESSED_RGBA_BPTC_UNORM, | + * | | | COMPRESSED_SRGB_ALPHA_BPTC_UNORM, | + * | | | COMPRESSED_RGB_BPTC_SIGNED_FLOAT, | + * | | | COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT | + * --------------------------------------------------------------------- + * | 64-bit | RGBA16F, RG32F, | COMPRESSED_RGB_S3TC_DXT1_EXT, | + * | | RGBA16UI, RG32UI, | COMPRESSED_SRGB_S3TC_DXT1_EXT, | + * | | RGBA16I, RG32I, | COMPRESSED_RGBA_S3TC_DXT1_EXT, | + * | | RGBA16, | COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,| + * | | RGBA16_SNORM | COMPRESSED_RED_RGTC1, | + * | | | COMPRESSED_SIGNED_RED_RGTC1 | + * --------------------------------------------------------------------- + */ + + switch (compressedFormat) { + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + case GL_COMPRESSED_RG_RGTC2: + case GL_COMPRESSED_SIGNED_RG_RGTC2: + case GL_COMPRESSED_RGBA_BPTC_UNORM: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM: + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: + compressedClass = BLOCK_CLASS_128_BITS; + break; + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RED_RGTC1: + case GL_COMPRESSED_SIGNED_RED_RGTC1: + compressedClass = BLOCK_CLASS_64_BITS; + break; + default: + return false; + } + + switch (otherFormat) { + case GL_RGBA32UI: + case GL_RGBA32I: + case GL_RGBA32F: + otherClass = BLOCK_CLASS_128_BITS; + break; + case GL_RGBA16F: + case GL_RG32F: + case GL_RGBA16UI: + case GL_RG32UI: + case GL_RGBA16I: + case GL_RG32I: + case GL_RGBA16: + case GL_RGBA16_SNORM: + otherClass = BLOCK_CLASS_64_BITS; + break; + default: + return false; + } + + return compressedClass == otherClass; +} + +static bool +copy_format_compatible(struct gl_context *ctx, + GLenum srcFormat, GLenum dstFormat) +{ + /* + * From ARB_copy_image spec: + * For the purposes of CopyImageSubData, two internal formats + * are considered compatible if any of the following conditions are + * met: + * * the formats are the same, + * * the formats are considered compatible according to the + * compatibility rules used for texture views as defined in + * section 3.9.X. In particular, if both internal formats are listed + * in the same entry of Table 3.X.2, they are considered compatible, or + * * one format is compressed and the other is uncompressed and + * Table 4.X.1 lists the two formats in the same row. + */ + + if (_mesa_texture_view_compatible_format(ctx, srcFormat, dstFormat)) { + /* Also checks if formats are equal. */ + return true; + } else if (_mesa_is_compressed_format(ctx, srcFormat)) { + return compressed_format_compatible(ctx, srcFormat, dstFormat); + } else if (_mesa_is_compressed_format(ctx, dstFormat)) { + return compressed_format_compatible(ctx, dstFormat, srcFormat); + } + + return false; +} + void GLAPIENTRY _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, @@ -265,7 +389,7 @@ struct gl_texture_object *srcTexObj, *dstTexObj; struct gl_texture_image *srcTexImage, *dstTexImage; GLuint src_bw, src_bh, dst_bw, dst_bh; - int i, srcNewZ, dstNewZ, Bpt; + int i, srcNewZ, dstNewZ; if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glCopyImageSubData(%u, %s, %d, %d, %d, %d, " @@ -306,15 +430,6 @@ goto cleanup; } - /* Very simple sanity check. This is sufficient if one of the textures - * is compressed. */ - Bpt = _mesa_get_format_bytes(srcTexImage->TexFormat); - if (_mesa_get_format_bytes(dstTexImage->TexFormat) != Bpt) { - _mesa_error(ctx, GL_INVALID_VALUE, - "glCopyImageSubData(internalFormat mismatch)"); - goto cleanup; - } - if (!check_region_bounds(ctx, srcTexImage, srcX, srcY, srcZ, srcWidth, srcHeight, srcDepth, "src")) goto cleanup; @@ -324,17 +439,11 @@ (srcHeight / src_bh) * dst_bh, srcDepth, "dst")) goto cleanup; - if (_mesa_is_format_compressed(srcTexImage->TexFormat)) { - /* XXX: Technically, we should probaby do some more specific checking - * here. However, this should be sufficient for all compressed - * formats that mesa supports since it is a direct memory copy. - */ - } else if (_mesa_is_format_compressed(dstTexImage->TexFormat)) { - } else if (_mesa_texture_view_compatible_format(ctx, - srcTexImage->InternalFormat, - dstTexImage->InternalFormat)) { - } else { - return; /* Error logged by _mesa_texture_view_compatible_format */ + if (!copy_format_compatible(ctx, srcTexImage->InternalFormat, + dstTexImage->InternalFormat)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCopyImageSubData(internalFormat mismatch)"); + goto cleanup; } for (i = 0; i < srcDepth; ++i) { diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/imports.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/imports.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/imports.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/imports.c 2015-03-18 08:43:35.000000000 +0000 @@ -481,24 +481,6 @@ /** \name String */ /*@{*/ -/** - * Implemented using malloc() and strcpy. - * Note that NULL is handled accordingly. - */ -char * -_mesa_strdup( const char *s ) -{ - if (s) { - size_t l = strlen(s); - char *s2 = malloc(l + 1); - if (s2) - strcpy(s2, s); - return s2; - } - else { - return NULL; - } -} /** Compute simple checksum/hash for a string */ unsigned int diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/imports.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/imports.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/imports.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/imports.h 2015-03-18 08:43:35.000000000 +0000 @@ -448,9 +448,6 @@ return h & 0x8000; } -extern char * -_mesa_strdup( const char *s ); - extern unsigned int _mesa_str_checksum(const char *str); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/objectlabel.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/objectlabel.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/objectlabel.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/objectlabel.c 2015-03-18 08:43:35.000000000 +0000 @@ -76,7 +76,7 @@ MAX_LABEL_LENGTH); /* null-terminated string */ - *labelPtr = _mesa_strdup(label); + *labelPtr = strdup(label); } } } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/pbo.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/pbo.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/pbo.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/pbo.c 2015-03-18 08:43:35.000000000 +0000 @@ -80,7 +80,7 @@ */ if (!_mesa_is_bufferobj(pack->BufferObj)) { offset = 0; - size = clientMemSize; + size = (clientMemSize == INT_MAX) ? UINTPTR_MAX : clientMemSize; } else { offset = (uintptr_t)ptr; size = pack->BufferObj->Size; @@ -164,23 +164,18 @@ return buf; } - /** - * Combine PBO-read validation and mapping. - * If any GL errors are detected, they'll be recorded and NULL returned. + * Perform PBO validation for read operations with uncompressed textures. + * If any GL errors are detected, false is returned, otherwise returns true. * \sa _mesa_validate_pbo_access - * \sa _mesa_map_pbo_source - * A call to this function should have a matching call to - * _mesa_unmap_pbo_source(). */ -const GLvoid * -_mesa_map_validate_pbo_source(struct gl_context *ctx, - GLuint dimensions, - const struct gl_pixelstore_attrib *unpack, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - GLsizei clientMemSize, - const GLvoid *ptr, const char *where) +bool +_mesa_validate_pbo_source(struct gl_context *ctx, GLuint dimensions, + const struct gl_pixelstore_attrib *unpack, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + GLsizei clientMemSize, + const GLvoid *ptr, const char *where) { assert(dimensions == 1 || dimensions == 2 || dimensions == 3); @@ -188,24 +183,85 @@ format, type, clientMemSize, ptr)) { if (_mesa_is_bufferobj(unpack->BufferObj)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(out of bounds PBO access)", where); + "%s(out of bounds PBO access)", + where); } else { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(out of bounds access: bufSize (%d) is too small)", where, clientMemSize); } - return NULL; + return false; } if (!_mesa_is_bufferobj(unpack->BufferObj)) { /* non-PBO access: no further validation to be done */ - return ptr; + return true; } if (_mesa_check_disallowed_mapping(unpack->BufferObj)) { /* buffer is already mapped - that's an error */ - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", where); - return NULL; + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", + where); + return false; + } + + return true; +} + +/** + * Perform PBO validation for read operations with compressed textures. + * If any GL errors are detected, false is returned, otherwise returns true. + */ +bool +_mesa_validate_pbo_source_compressed(struct gl_context *ctx, GLuint dimensions, + const struct gl_pixelstore_attrib *unpack, + GLsizei imageSize, const GLvoid *pixels, + const char *where) +{ + if (!_mesa_is_bufferobj(unpack->BufferObj)) { + /* not using a PBO */ + return true; + } + + if ((const GLubyte *) pixels + imageSize > + ((const GLubyte *) 0) + unpack->BufferObj->Size) { + /* out of bounds read! */ + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid PBO access)", + where); + return false; + } + + if (_mesa_check_disallowed_mapping(unpack->BufferObj)) { + /* buffer is already mapped - that's an error */ + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", + where); + return false; + } + + return true; +} + +/** + * Perform PBO-read mapping. + * If any GL errors are detected, they'll be recorded and NULL returned. + * \sa _mesa_validate_pbo_source + * \sa _mesa_map_pbo_source + * A call to this function should have a matching call to + * _mesa_unmap_pbo_source(). + */ +const GLvoid * +_mesa_map_validate_pbo_source(struct gl_context *ctx, + GLuint dimensions, + const struct gl_pixelstore_attrib *unpack, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + GLsizei clientMemSize, + const GLvoid *ptr, const char *where) +{ + if (!_mesa_validate_pbo_source(ctx, dimensions, unpack, + width, height, depth, format, type, + clientMemSize, ptr, where)) { + return NULL; } ptr = _mesa_map_pbo_source(ctx, unpack, ptr); @@ -381,28 +437,27 @@ { GLubyte *buf; + if (!_mesa_validate_pbo_source_compressed(ctx, dimensions, packing, + imageSize, pixels, funcName)) { + /* error is already set during validation */ + return NULL; + } + if (!_mesa_is_bufferobj(packing->BufferObj)) { /* not using a PBO - return pointer unchanged */ return pixels; } - if ((const GLubyte *) pixels + imageSize > - ((const GLubyte *) 0) + packing->BufferObj->Size) { - /* out of bounds read! */ - _mesa_error(ctx, GL_INVALID_OPERATION, "%s%uD(invalid PBO access)", - funcName, dimensions); - return NULL; - } buf = (GLubyte*) ctx->Driver.MapBufferRange(ctx, 0, packing->BufferObj->Size, GL_MAP_READ_BIT, packing->BufferObj, MAP_INTERNAL); - if (!buf) { - _mesa_error(ctx, GL_INVALID_OPERATION, "%s%uD(PBO is mapped)", funcName, - dimensions); - return NULL; - } + + /* Validation above already checked that PBO is not mapped, so buffer + * should not be null. + */ + assert(buf); return ADD_POINTERS(buf, pixels); } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/pbo.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/pbo.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/pbo.h 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/pbo.h 2015-03-18 08:43:35.000000000 +0000 @@ -92,4 +92,18 @@ const struct gl_pixelstore_attrib *unpack); +extern bool +_mesa_validate_pbo_source(struct gl_context *ctx, GLuint dimensions, + const struct gl_pixelstore_attrib *unpack, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + GLsizei clientMemSize, + const GLvoid *ptr, const char *where); + +extern bool +_mesa_validate_pbo_source_compressed(struct gl_context *ctx, GLuint dimensions, + const struct gl_pixelstore_attrib *unpack, + GLsizei imageSize, const GLvoid *ptr, + const char *where); + #endif diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/querymatrix.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/querymatrix.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/querymatrix.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/querymatrix.c 2015-03-18 08:43:35.000000000 +0000 @@ -13,7 +13,7 @@ #include -#include +#include "c99_math.h" #include "glheader.h" #include "querymatrix.h" #include "main/get.h" @@ -37,169 +37,120 @@ #define INT_TO_FIXED(x) ((GLfixed) ((x) << 16)) #define FLOAT_TO_FIXED(x) ((GLfixed) ((x) * 65536.0)) -#if defined(fpclassify) -/* ISO C99 says that fpclassify is a macro. Assume that any implementation - * of fpclassify, whether it's in a C99 compiler or not, will be a macro. - */ -#elif defined(_MSC_VER) -/* Not required on VS2013 and above. */ -/* Oddly, the fpclassify() function doesn't exist in such a form - * on MSVC. This is an implementation using slightly different - * lower-level Windows functions. - */ -#include - -enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL} -fpclassify(double x) -{ - switch(_fpclass(x)) { - case _FPCLASS_SNAN: /* signaling NaN */ - case _FPCLASS_QNAN: /* quiet NaN */ - return FP_NAN; - case _FPCLASS_NINF: /* negative infinity */ - case _FPCLASS_PINF: /* positive infinity */ - return FP_INFINITE; - case _FPCLASS_NN: /* negative normal */ - case _FPCLASS_PN: /* positive normal */ - return FP_NORMAL; - case _FPCLASS_ND: /* negative denormalized */ - case _FPCLASS_PD: /* positive denormalized */ - return FP_SUBNORMAL; - case _FPCLASS_NZ: /* negative zero */ - case _FPCLASS_PZ: /* positive zero */ - return FP_ZERO; - default: - /* Should never get here; but if we do, this will guarantee - * that the pattern is not treated like a number. - */ - return FP_NAN; - } -} - -#else - -enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL} -fpclassify(double x) -{ - /* XXX do something better someday */ - return FP_NORMAL; -} - -#endif -GLbitfield GLAPIENTRY _mesa_QueryMatrixxOES(GLfixed mantissa[16], GLint exponent[16]) +GLbitfield GLAPIENTRY +_mesa_QueryMatrixxOES(GLfixed mantissa[16], GLint exponent[16]) { - GLfloat matrix[16]; - GLint tmp; - GLenum currentMode = GL_FALSE; - GLenum desiredMatrix = GL_FALSE; - /* The bitfield returns 1 for each component that is invalid (i.e. - * NaN or Inf). In case of error, everything is invalid. - */ - GLbitfield rv; - register unsigned int i; - unsigned int bit; - - /* This data structure defines the mapping between the current matrix - * mode and the desired matrix identifier. - */ - static struct { - GLenum currentMode; - GLenum desiredMatrix; - } modes[] = { - {GL_MODELVIEW, GL_MODELVIEW_MATRIX}, - {GL_PROJECTION, GL_PROJECTION_MATRIX}, - {GL_TEXTURE, GL_TEXTURE_MATRIX}, - }; - - /* Call Mesa to get the current matrix in floating-point form. First, - * we have to figure out what the current matrix mode is. - */ - _mesa_GetIntegerv(GL_MATRIX_MODE, &tmp); - currentMode = (GLenum) tmp; - - /* The mode is either GL_FALSE, if for some reason we failed to query - * the mode, or a given mode from the above table. Search for the - * returned mode to get the desired matrix; if we don't find it, - * we can return immediately, as _mesa_GetInteger() will have - * logged the necessary error already. - */ - for (i = 0; i < sizeof(modes)/sizeof(modes[0]); i++) { - if (modes[i].currentMode == currentMode) { - desiredMatrix = modes[i].desiredMatrix; - break; - } - } - if (desiredMatrix == GL_FALSE) { - /* Early error means all values are invalid. */ - return 0xffff; - } - - /* Now pull the matrix itself. */ - _mesa_GetFloatv(desiredMatrix, matrix); - - rv = 0; - for (i = 0, bit = 1; i < 16; i++, bit<<=1) { - float normalizedFraction; - int exp; - - switch (fpclassify(matrix[i])) { - /* A "subnormal" or denormalized number is too small to be - * represented in normal format; but despite that it's a - * valid floating point number. FP_ZERO and FP_NORMAL - * are both valid as well. We should be fine treating - * these three cases as legitimate floating-point numbers. - */ - case FP_SUBNORMAL: - case FP_NORMAL: - case FP_ZERO: - normalizedFraction = (GLfloat)frexp(matrix[i], &exp); - mantissa[i] = FLOAT_TO_FIXED(normalizedFraction); - exponent[i] = (GLint) exp; - break; - - /* If the entry is not-a-number or an infinity, then the - * matrix component is invalid. The invalid flag for - * the component is already set; might as well set the - * other return values to known values. We'll set - * distinct values so that a savvy end user could determine - * whether the matrix component was a NaN or an infinity, - * but this is more useful for debugging than anything else - * since the standard doesn't specify any such magic - * values to return. - */ - case FP_NAN: - mantissa[i] = INT_TO_FIXED(0); - exponent[i] = (GLint) 0; - rv |= bit; - break; - - case FP_INFINITE: - /* Return +/- 1 based on whether it's a positive or - * negative infinity. - */ - if (matrix[i] > 0) { - mantissa[i] = INT_TO_FIXED(1); - } - else { - mantissa[i] = -INT_TO_FIXED(1); - } - exponent[i] = (GLint) 0; - rv |= bit; - break; - - /* We should never get here; but here's a catching case - * in case fpclassify() is returnings something unexpected. - */ - default: - mantissa[i] = INT_TO_FIXED(2); - exponent[i] = (GLint) 0; - rv |= bit; - break; - } + GLfloat matrix[16]; + GLint tmp; + GLenum currentMode = GL_FALSE; + GLenum desiredMatrix = GL_FALSE; + /* The bitfield returns 1 for each component that is invalid (i.e. + * NaN or Inf). In case of error, everything is invalid. + */ + GLbitfield rv; + unsigned i, bit; + + /* This data structure defines the mapping between the current matrix + * mode and the desired matrix identifier. + */ + static const struct { + GLenum currentMode; + GLenum desiredMatrix; + } modes[] = { + {GL_MODELVIEW, GL_MODELVIEW_MATRIX}, + {GL_PROJECTION, GL_PROJECTION_MATRIX}, + {GL_TEXTURE, GL_TEXTURE_MATRIX}, + }; + + /* Call Mesa to get the current matrix in floating-point form. First, + * we have to figure out what the current matrix mode is. + */ + _mesa_GetIntegerv(GL_MATRIX_MODE, &tmp); + currentMode = (GLenum) tmp; + + /* The mode is either GL_FALSE, if for some reason we failed to query + * the mode, or a given mode from the above table. Search for the + * returned mode to get the desired matrix; if we don't find it, + * we can return immediately, as _mesa_GetInteger() will have + * logged the necessary error already. + */ + for (i = 0; i < ARRAY_SIZE(modes); i++) { + if (modes[i].currentMode == currentMode) { + desiredMatrix = modes[i].desiredMatrix; + break; + } + } + if (desiredMatrix == GL_FALSE) { + /* Early error means all values are invalid. */ + return 0xffff; + } + + /* Now pull the matrix itself. */ + _mesa_GetFloatv(desiredMatrix, matrix); + + rv = 0; + for (i = 0, bit = 1; i < 16; i++, bit<<=1) { + float normalizedFraction; + int exp; + + switch (fpclassify(matrix[i])) { + case FP_SUBNORMAL: + case FP_NORMAL: + case FP_ZERO: + /* A "subnormal" or denormalized number is too small to be + * represented in normal format; but despite that it's a + * valid floating point number. FP_ZERO and FP_NORMAL + * are both valid as well. We should be fine treating + * these three cases as legitimate floating-point numbers. + */ + normalizedFraction = (GLfloat)frexp(matrix[i], &exp); + mantissa[i] = FLOAT_TO_FIXED(normalizedFraction); + exponent[i] = (GLint) exp; + break; + + case FP_NAN: + /* If the entry is not-a-number or an infinity, then the + * matrix component is invalid. The invalid flag for + * the component is already set; might as well set the + * other return values to known values. We'll set + * distinct values so that a savvy end user could determine + * whether the matrix component was a NaN or an infinity, + * but this is more useful for debugging than anything else + * since the standard doesn't specify any such magic + * values to return. + */ + mantissa[i] = INT_TO_FIXED(0); + exponent[i] = (GLint) 0; + rv |= bit; + break; + + case FP_INFINITE: + /* Return +/- 1 based on whether it's a positive or + * negative infinity. + */ + if (matrix[i] > 0) { + mantissa[i] = INT_TO_FIXED(1); + } + else { + mantissa[i] = -INT_TO_FIXED(1); + } + exponent[i] = (GLint) 0; + rv |= bit; + break; + + default: + /* We should never get here; but here's a catching case + * in case fpclassify() is returnings something unexpected. + */ + mantissa[i] = INT_TO_FIXED(2); + exponent[i] = (GLint) 0; + rv |= bit; + break; + } - } /* for each component */ + } /* for each component */ - /* All done */ - return rv; + /* All done */ + return rv; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/shaderapi.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/shaderapi.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/shaderapi.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/shaderapi.c 2015-03-18 08:43:35.000000000 +0000 @@ -1460,7 +1460,7 @@ fclose(f); - shader = _mesa_strdup(buffer); + shader = strdup(buffer); free(buffer); return shader; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/tests/dispatch_sanity.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/tests/dispatch_sanity.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/tests/dispatch_sanity.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/tests/dispatch_sanity.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -953,6 +953,21 @@ { "glClipControl", 45, -1 }, /* GL_ARB_direct_state_access */ + { "glCreateBuffers", 45, -1 }, + { "glNamedBufferStorage", 45, -1 }, + { "glNamedBufferData", 45, -1 }, + { "glNamedBufferSubData", 45, -1 }, + { "glCopyNamedBufferSubData", 45, -1 }, + { "glClearNamedBufferData", 45, -1 }, + { "glClearNamedBufferSubData", 45, -1 }, + { "glMapNamedBuffer", 45, -1 }, + { "glMapNamedBufferRange", 45, -1 }, + { "glUnmapNamedBuffer", 45, -1 }, + { "glFlushMappedNamedBufferRange", 45, -1 }, + { "glGetNamedBufferParameteriv", 45, -1 }, + { "glGetNamedBufferParameteri64v", 45, -1 }, + { "glGetNamedBufferPointerv", 45, -1 }, + { "glGetNamedBufferSubData", 45, -1 }, { "glCreateTextures", 45, -1 }, { "glTextureStorage1D", 45, -1 }, { "glTextureStorage2D", 45, -1 }, diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/teximage.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/teximage.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/teximage.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/teximage.c 2015-03-18 08:43:35.000000000 +0000 @@ -53,6 +53,7 @@ #include "mtypes.h" #include "glformats.h" #include "texstore.h" +#include "pbo.h" /** @@ -1619,32 +1620,30 @@ /* Check size */ if (subWidth < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s%dD(width=%d)", func, dims, subWidth); + "%s(width=%d)", func, subWidth); return GL_TRUE; } if (dims > 1 && subHeight < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s%dD(height=%d)", func, dims, subHeight); + "%s(height=%d)", func, subHeight); return GL_TRUE; } if (dims > 2 && subDepth < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s%dD(depth=%d)", func, dims, subDepth); + "%s(depth=%d)", func, subDepth); return GL_TRUE; } /* check xoffset and width */ if (xoffset < - (GLint) destImage->Border) { - _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(xoffset)", - func, dims); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset)", func); return GL_TRUE; } if (xoffset + subWidth > (GLint) destImage->Width) { - _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(xoffset+width)", - func, dims); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset+width)", func); return GL_TRUE; } @@ -1652,13 +1651,11 @@ if (dims > 1) { GLint yBorder = (target == GL_TEXTURE_1D_ARRAY) ? 0 : destImage->Border; if (yoffset < -yBorder) { - _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(yoffset)", - func, dims); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(yoffset)", func); return GL_TRUE; } if (yoffset + subHeight > (GLint) destImage->Height) { - _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(yoffset+height)", - func, dims); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(yoffset+height)", func); return GL_TRUE; } } @@ -1671,7 +1668,7 @@ 0 : destImage->Border; if (zoffset < -zBorder) { - _mesa_error(ctx, GL_INVALID_VALUE, "%s3D(zoffset)", func); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(zoffset)", func); return GL_TRUE; } @@ -1679,7 +1676,7 @@ if (target == GL_TEXTURE_CUBE_MAP) depth = 6; if (zoffset + subDepth > depth) { - _mesa_error(ctx, GL_INVALID_VALUE, "%s3D(zoffset+depth)", func); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(zoffset+depth)", func); return GL_TRUE; } } @@ -1697,8 +1694,8 @@ /* offset must be multiple of block size */ if ((xoffset % bw != 0) || (yoffset % bh != 0)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "%s%dD(xoffset = %d, yoffset = %d)", - func, dims, xoffset, yoffset); + "%s(xoffset = %d, yoffset = %d)", + func, xoffset, yoffset); return GL_TRUE; } @@ -1710,14 +1707,14 @@ if ((subWidth % bw != 0) && (xoffset + subWidth != (GLint) destImage->Width)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "%s%dD(width = %d)", func, dims, subWidth); + "%s(width = %d)", func, subWidth); return GL_TRUE; } if ((subHeight % bh != 0) && (yoffset + subHeight != (GLint) destImage->Height)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "%s%dD(height = %d)", func, dims, subHeight); + "%s(height = %d)", func, subHeight); return GL_TRUE; } } @@ -2113,7 +2110,8 @@ GLint level, GLint internalFormat, GLenum format, GLenum type, GLint width, GLint height, - GLint depth, GLint border ) + GLint depth, GLint border, + const GLvoid *pixels ) { GLenum err; @@ -2198,6 +2196,13 @@ return GL_TRUE; } + /* validate the bound PBO, if any */ + if (!_mesa_validate_pbo_source(ctx, dimensions, &ctx->Unpack, + width, height, depth, format, type, + INT_MAX, pixels, "glTexImage")) { + return GL_TRUE; + } + /* make sure internal format and format basically agree */ if (!texture_formats_agree(internalFormat, format)) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -2294,7 +2299,7 @@ GLenum target, GLint level, GLenum internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLint border, - GLsizei imageSize) + GLsizei imageSize, const GLvoid *data) { const GLint maxLevels = _mesa_max_texture_levels(ctx, target); GLint expectedSize; @@ -2322,6 +2327,13 @@ return GL_TRUE; } + /* validate the bound PBO, if any */ + if (!_mesa_validate_pbo_source_compressed(ctx, dimensions, &ctx->Unpack, + imageSize, data, + "glCompressedTexImage")) { + return GL_TRUE; + } + switch (internalFormat) { case GL_PALETTE4_RGB8_OES: case GL_PALETTE4_RGBA8_OES: @@ -2454,30 +2466,28 @@ GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint width, GLint height, GLint depth, - GLenum format, GLenum type, bool dsa) + GLenum format, GLenum type, const GLvoid *pixels, + bool dsa, const char *callerName) { struct gl_texture_image *texImage; GLenum err; - const char* suffix = dsa ? "ture" : ""; if (!texObj) { /* must be out of memory */ - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTex%sSubImage%dD()", - suffix, dimensions); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s()", callerName); return GL_TRUE; } /* check target (proxies not allowed) */ if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sSubImage%uD(target=%s)", - suffix, dimensions, _mesa_lookup_enum_by_nr(target)); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)", + callerName, _mesa_lookup_enum_by_nr(target)); return GL_TRUE; } /* level check */ if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sSubImage%uD(level=%d)", - suffix, dimensions, level); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(level=%d)", callerName, level); return GL_TRUE; } @@ -2489,9 +2499,8 @@ if (_mesa_is_gles(ctx) && !_mesa_is_gles3(ctx)) { err = _mesa_es_error_check_format_and_type(format, type, dimensions); if (err != GL_NO_ERROR) { - _mesa_error(ctx, err, - "glTex%sSubImage%dD(format = %s, type = %s)", - suffix, dimensions, _mesa_lookup_enum_by_nr(format), + _mesa_error(ctx, err, "%s(format = %s, type = %s)", + callerName, _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type)); return GL_TRUE; } @@ -2500,34 +2509,37 @@ err = _mesa_error_check_format_and_type(ctx, format, type); if (err != GL_NO_ERROR) { _mesa_error(ctx, err, - "glTex%sSubImage%dD(incompatible format = %s, type = %s)", - suffix, dimensions, _mesa_lookup_enum_by_nr(format), + "%s(incompatible format = %s, type = %s)", + callerName, _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type)); return GL_TRUE; } + /* validate the bound PBO, if any */ + if (!_mesa_validate_pbo_source(ctx, dimensions, &ctx->Unpack, + width, height, depth, format, type, + INT_MAX, pixels, callerName)) { + return GL_TRUE; + } + texImage = _mesa_select_tex_image(texObj, target, level); if (!texImage) { /* non-existant texture level */ - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTex%sSubImage%dD(invalid texture image)", suffix, - dimensions); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture image)", + callerName); return GL_TRUE; } if (error_check_subtexture_dimensions(ctx, dimensions, texImage, xoffset, yoffset, zoffset, - width, height, depth, - dsa ? "glTextureSubImage" : - "glTexSubImage")) { + width, height, depth, callerName)) { return GL_TRUE; } if (_mesa_is_format_compressed(texImage->TexFormat)) { if (compressedteximage_only_format(ctx, texImage->InternalFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glTex%sSubImage%dD(no compression for format)", - suffix, dimensions); + "%s(no compression for format)", callerName); return GL_TRUE; } } @@ -2537,8 +2549,7 @@ if (_mesa_is_format_integer_color(texImage->TexFormat) != _mesa_is_enum_format_integer(format)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glTex%sSubImage%dD(integer/non-integer format mismatch)", - suffix, dimensions); + "%s(integer/non-integer format mismatch)", callerName); return GL_TRUE; } } @@ -3218,12 +3229,13 @@ if (compressed_texture_error_check(ctx, dims, target, level, internalFormat, width, height, depth, - border, imageSize)) + border, imageSize, pixels)) return; } else { if (texture_error_check(ctx, dims, target, level, internalFormat, - format, type, width, height, depth, border)) + format, type, width, height, depth, border, + pixels)) return; } @@ -3562,7 +3574,8 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, const GLvoid *pixels) + GLenum format, GLenum type, const GLvoid *pixels, + const char *callerName) { struct gl_texture_object *texObj; struct gl_texture_image *texImage; @@ -3573,7 +3586,8 @@ if (texsubimage_error_check(ctx, dims, texObj, target, level, xoffset, yoffset, zoffset, - width, height, depth, format, type, false)) { + width, height, depth, format, type, + pixels, false, callerName)) { return; /* error was detected */ } @@ -3603,7 +3617,8 @@ GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, const GLvoid *pixels) + GLenum format, GLenum type, const GLvoid *pixels, + const char *callerName) { struct gl_texture_object *texObj; struct gl_texture_image *texImage; @@ -3627,7 +3642,8 @@ if (texsubimage_error_check(ctx, dims, texObj, texObj->Target, level, xoffset, yoffset, zoffset, - width, height, depth, format, type, true)) { + width, height, depth, format, type, + pixels, true, callerName)) { return; /* error was detected */ } @@ -3708,7 +3724,7 @@ texsubimage(ctx, 1, target, level, xoffset, 0, 0, width, 1, 1, - format, type, pixels); + format, type, pixels, "glTexSubImage1D"); } @@ -3723,7 +3739,7 @@ texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1, - format, type, pixels); + format, type, pixels, "glTexSubImage2D"); } @@ -3739,7 +3755,7 @@ texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, - format, type, pixels); + format, type, pixels, "glTexSubImage3D"); } void GLAPIENTRY @@ -3752,7 +3768,7 @@ texturesubimage(ctx, 1, texture, level, xoffset, 0, 0, width, 1, 1, - format, type, pixels); + format, type, pixels, "glTextureSubImage1D"); } @@ -3767,7 +3783,7 @@ texturesubimage(ctx, 2, texture, level, xoffset, yoffset, 0, width, height, 1, - format, type, pixels); + format, type, pixels, "glTextureSubImage2D"); } @@ -3782,7 +3798,7 @@ texturesubimage(ctx, 3, texture, level, xoffset, yoffset, zoffset, width, height, depth, - format, type, pixels); + format, type, pixels, "glTextureSubImage3D"); } @@ -4623,68 +4639,72 @@ GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLsizei imageSize, bool dsa) + GLenum format, GLsizei imageSize, + const GLvoid *data, const char *callerName) { struct gl_texture_image *texImage; GLint expectedSize; - const char *suffix = dsa ? "ture" : ""; /* this will catch any invalid compressed format token */ if (!_mesa_is_compressed_format(ctx, format)) { _mesa_error(ctx, GL_INVALID_ENUM, - "glCompressedTex%sSubImage%uD(format)", suffix, dims); + "%s(format)", callerName); return GL_TRUE; } if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) { _mesa_error(ctx, GL_INVALID_VALUE, - "glCompressedTex%sSubImage%uD(level=%d)", - suffix, dims, level); + "%s(level=%d)", + callerName, level); + return GL_TRUE; + } + + /* validate the bound PBO, if any */ + if (!_mesa_validate_pbo_source_compressed(ctx, dims, &ctx->Unpack, + imageSize, data, callerName)) { return GL_TRUE; } /* Check for invalid pixel storage modes */ if (!_mesa_compressed_pixel_storage_error_check(ctx, dims, - &ctx->Unpack, - dsa ? "glCompressedTextureSubImage" : - "glCompressedTexSubImage")) { + &ctx->Unpack, callerName)) { return GL_TRUE; } expectedSize = compressed_tex_size(width, height, depth, format); if (expectedSize != imageSize) { _mesa_error(ctx, GL_INVALID_VALUE, - "glCompressedTex%sSubImage%uD(size=%d)", - suffix, dims, imageSize); + "%s(size=%d)", + callerName, imageSize); return GL_TRUE; } texImage = _mesa_select_tex_image(texObj, target, level); if (!texImage) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glCompressedTex%sSubImage%uD(invalid texture image)", - suffix, dims); + "%s(invalid texture image)", + callerName); return GL_TRUE; } if ((GLint) format != texImage->InternalFormat) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glCompressedTex%sSubImage%uD(format=0x%x)", - suffix, dims, format); + "%s(format=0x%x)", + callerName, format); return GL_TRUE; } if (compressedteximage_only_format(ctx, format)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glCompressedTex%sSubImage%uD(format=0x%x cannot be updated)", - suffix, dims, format); + "%s(format=0x%x cannot be updated)", + callerName, format); return GL_TRUE; } if (error_check_subtexture_dimensions(ctx, dims, texImage, xoffset, yoffset, zoffset, width, height, depth, - "glCompressedTexSubImage")) { + callerName)) { return GL_TRUE; } @@ -4787,7 +4807,8 @@ if (compressed_subtexture_error_check(ctx, 1, texObj, target, level, xoffset, 0, 0, width, 1, 1, - format, imageSize, false)) { + format, imageSize, data, + "glCompressedTexSubImage1D")) { return; } @@ -4823,7 +4844,8 @@ if (compressed_subtexture_error_check(ctx, 1, texObj, texObj->Target, level, xoffset, 0, 0, width, 1, 1, - format, imageSize, true)) { + format, imageSize, data, + "glCompressedTextureSubImage1D")) { return; } @@ -4860,7 +4882,8 @@ if (compressed_subtexture_error_check(ctx, 2, texObj, target, level, xoffset, yoffset, 0, width, height, 1, - format, imageSize, false)) { + format, imageSize, data, + "glCompressedTexSubImage2D")) { return; } @@ -4899,7 +4922,8 @@ if (compressed_subtexture_error_check(ctx, 2, texObj, texObj->Target, level, xoffset, yoffset, 0, width, height, 1, - format, imageSize, true)) { + format, imageSize, data, + "glCompressedTextureSubImage2D")) { return; } @@ -4935,7 +4959,8 @@ if (compressed_subtexture_error_check(ctx, 3, texObj, target, level, xoffset, yoffset, zoffset, width, height, depth, - format, imageSize, false)) { + format, imageSize, data, + "glCompressedTexSubImage3D")) { return; } @@ -4975,7 +5000,8 @@ if (compressed_subtexture_error_check(ctx, 3, texObj, texObj->Target, level, xoffset, yoffset, zoffset, width, height, depth, - format, imageSize, true)) { + format, imageSize, data, + "glCompressedTextureSubImage3D")) { return; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/texobj.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/texobj.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/texobj.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/texobj.c 2015-03-18 08:43:35.000000000 +0000 @@ -879,10 +879,6 @@ if (texObj->Target != GL_TEXTURE_CUBE_MAP) return GL_FALSE; - /* Make sure we have enough image planes for a cube map. */ - if (texObj->NumLayers < 6) - return GL_FALSE; - if ((level < 0) || (level >= MAX_TEXTURE_LEVELS)) return GL_FALSE; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/main/transformfeedback.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/transformfeedback.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/main/transformfeedback.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/main/transformfeedback.c 2015-03-18 08:43:35.000000000 +0000 @@ -762,7 +762,7 @@ /* Save the new names and the count */ for (i = 0; i < count; i++) { - shProg->TransformFeedback.VaryingNames[i] = _mesa_strdup(varyings[i]); + shProg->TransformFeedback.VaryingNames[i] = strdup(varyings[i]); } shProg->TransformFeedback.NumVarying = count; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/math/m_clip_tmp.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/math/m_clip_tmp.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/math/m_clip_tmp.h 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/math/m_clip_tmp.h 2015-03-18 08:43:35.000000000 +0000 @@ -40,7 +40,7 @@ * \param andMask bitwise-AND of clipMask values * \return proj_vec pointer */ -static GLvector4f * _XFORMAPI TAG(cliptest_points4)( GLvector4f *clip_vec, +static GLvector4f * TAG(cliptest_points4)( GLvector4f *clip_vec, GLvector4f *proj_vec, GLubyte clipMask[], GLubyte *orMask, @@ -120,7 +120,7 @@ * \param andMask bitwise-AND of clipMask values * \return clip_vec pointer */ -static GLvector4f * _XFORMAPI TAG(cliptest_np_points4)( GLvector4f *clip_vec, +static GLvector4f * TAG(cliptest_np_points4)( GLvector4f *clip_vec, GLvector4f *proj_vec, GLubyte clipMask[], GLubyte *orMask, @@ -177,7 +177,7 @@ } -static GLvector4f * _XFORMAPI TAG(cliptest_points3)( GLvector4f *clip_vec, +static GLvector4f * TAG(cliptest_points3)( GLvector4f *clip_vec, GLvector4f *proj_vec, GLubyte clipMask[], GLubyte *orMask, @@ -213,7 +213,7 @@ } -static GLvector4f * _XFORMAPI TAG(cliptest_points2)( GLvector4f *clip_vec, +static GLvector4f * TAG(cliptest_points2)( GLvector4f *clip_vec, GLvector4f *proj_vec, GLubyte clipMask[], GLubyte *orMask, diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/math/m_norm_tmp.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/math/m_norm_tmp.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/math/m_norm_tmp.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/math/m_norm_tmp.h 2015-03-18 08:43:35.000000000 +0000 @@ -39,7 +39,7 @@ * optimization) * dest - the destination vector of normals */ -static void _XFORMAPI +static void TAG(transform_normalize_normals)( const GLmatrix *mat, GLfloat scale, const GLvector4f *in, @@ -106,7 +106,7 @@ } -static void _XFORMAPI +static void TAG(transform_normalize_normals_no_rot)( const GLmatrix *mat, GLfloat scale, const GLvector4f *in, @@ -171,7 +171,7 @@ } -static void _XFORMAPI +static void TAG(transform_rescale_normals_no_rot)( const GLmatrix *mat, GLfloat scale, const GLvector4f *in, @@ -200,7 +200,7 @@ } -static void _XFORMAPI +static void TAG(transform_rescale_normals)( const GLmatrix *mat, GLfloat scale, const GLvector4f *in, @@ -232,7 +232,7 @@ } -static void _XFORMAPI +static void TAG(transform_normals_no_rot)( const GLmatrix *mat, GLfloat scale, const GLvector4f *in, @@ -262,7 +262,7 @@ } -static void _XFORMAPI +static void TAG(transform_normals)( const GLmatrix *mat, GLfloat scale, const GLvector4f *in, @@ -292,7 +292,7 @@ } -static void _XFORMAPI +static void TAG(normalize_normals)( const GLmatrix *mat, GLfloat scale, const GLvector4f *in, @@ -338,7 +338,7 @@ } -static void _XFORMAPI +static void TAG(rescale_normals)( const GLmatrix *mat, GLfloat scale, const GLvector4f *in, @@ -361,7 +361,7 @@ } -static void _XFORMAPI +static void TAG(init_c_norm_transform)( void ) { _mesa_normal_tab[NORM_TRANSFORM_NO_ROT] = diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/math/m_xform.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/math/m_xform.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/math/m_xform.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/math/m_xform.h 2015-03-18 08:43:35.000000000 +0000 @@ -32,14 +32,6 @@ #include "math/m_matrix.h" #include "math/m_vector.h" -#ifdef USE_X86_ASM -#define _XFORMAPI _ASMAPI -#define _XFORMAPIP _ASMAPIP -#else -#define _XFORMAPI -#define _XFORMAPIP * -#endif - extern void _math_init_transformation(void); @@ -99,12 +91,12 @@ #define CLIP_FRUSTUM_BITS 0x3f -typedef GLvector4f * (_XFORMAPIP clip_func)( GLvector4f *vClip, - GLvector4f *vProj, - GLubyte clipMask[], - GLubyte *orMask, - GLubyte *andMask, - GLboolean viewport_z_clip ); +typedef GLvector4f * (*clip_func)(GLvector4f *vClip, + GLvector4f *vProj, + GLubyte clipMask[], + GLubyte *orMask, + GLubyte *andMask, + GLboolean viewport_z_clip); typedef void (*dotprod_func)( GLfloat *out, GLuint out_stride, @@ -119,11 +111,11 @@ /* * Functions for transformation of normals in the VB. */ -typedef void (_XFORMAPIP normal_func)( const GLmatrix *mat, - GLfloat scale, - const GLvector4f *in, - const GLfloat lengths[], - GLvector4f *dest ); +typedef void (*normal_func)(const GLmatrix *mat, + GLfloat scale, + const GLvector4f *in, + const GLfloat lengths[], + GLvector4f *dest); /* Flags for selecting a normal transformation function. @@ -141,9 +133,9 @@ * when the mask byte is zero. This is always present as a * parameter, to allow a unified interface. */ -typedef void (_XFORMAPIP transform_func)( GLvector4f *to_vec, - const GLfloat m[16], - const GLvector4f *from_vec ); +typedef void (*transform_func)(GLvector4f *to_vec, + const GLfloat m[16], + const GLvector4f *from_vec); extern dotprod_func _mesa_dotprod_tab[5]; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/math/m_xform_tmp.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/math/m_xform_tmp.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/math/m_xform_tmp.h 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/math/m_xform_tmp.h 2015-03-18 08:43:35.000000000 +0000 @@ -70,7 +70,7 @@ * driver-specific vertex format. */ -static void _XFORMAPI +static void TAG(transform_points1_general)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -96,7 +96,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points1_identity)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -116,7 +116,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points1_2d)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -138,7 +138,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points1_2d_no_rot)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -159,7 +159,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points1_3d)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -183,7 +183,7 @@ } -static void _XFORMAPI +static void TAG(transform_points1_3d_no_rot)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -206,7 +206,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points1_perspective)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -236,7 +236,7 @@ * present early in the geometry pipeline and throughout the * texture pipeline. */ -static void _XFORMAPI +static void TAG(transform_points2_general)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -262,7 +262,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points2_identity)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -283,7 +283,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points2_2d)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -305,7 +305,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points2_2d_no_rot)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -326,7 +326,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points2_3d)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -353,7 +353,7 @@ /* I would actually say this was a fairly important function, from * a texture transformation point of view. */ -static void _XFORMAPI +static void TAG(transform_points2_3d_no_rot)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -382,7 +382,7 @@ } -static void _XFORMAPI +static void TAG(transform_points2_perspective)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -407,7 +407,7 @@ -static void _XFORMAPI +static void TAG(transform_points3_general)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -433,7 +433,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points3_identity)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -455,7 +455,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points3_2d)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -478,7 +478,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points3_2d_no_rot)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -500,7 +500,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points3_3d)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -526,7 +526,7 @@ /* previously known as ortho... */ -static void _XFORMAPI +static void TAG(transform_points3_3d_no_rot)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -549,7 +549,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points3_perspective)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -575,7 +575,7 @@ -static void _XFORMAPI +static void TAG(transform_points4_general)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -601,7 +601,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points4_identity)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -624,7 +624,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points4_2d)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -648,7 +648,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points4_2d_no_rot)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -671,7 +671,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points4_3d)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -696,7 +696,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points4_3d_no_rot)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -720,7 +720,7 @@ to_vec->count = from_vec->count; } -static void _XFORMAPI +static void TAG(transform_points4_perspective)( GLvector4f *to_vec, const GLfloat m[16], const GLvector4f *from_vec ) @@ -753,7 +753,7 @@ * optimized routines overwriting the arrays. This only occurs during * startup. */ -static void _XFORMAPI TAG(init_c_transformations)( void ) +static void TAG(init_c_transformations)( void ) { #define TAG_TAB _mesa_transform_tab #define TAG_TAB_1 TAG(transform_tab_1) diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/program/prog_instruction.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/program/prog_instruction.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/program/prog_instruction.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/program/prog_instruction.c 2015-03-18 08:43:35.000000000 +0000 @@ -89,7 +89,7 @@ memcpy(dest, src, n * sizeof(struct prog_instruction)); for (i = 0; i < n; i++) { if (src[i].Comment) - dest[i].Comment = _mesa_strdup(src[i].Comment); + dest[i].Comment = strdup(src[i].Comment); } return dest; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/program/prog_parameter.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/program/prog_parameter.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/program/prog_parameter.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/program/prog_parameter.c 2015-03-18 08:43:35.000000000 +0000 @@ -148,7 +148,7 @@ for (i = 0; i < sz4; i++) { struct gl_program_parameter *p = paramList->Parameters + oldNum + i; - p->Name = name ? _mesa_strdup(name) : NULL; + p->Name = name ? strdup(name) : NULL; p->Type = type; p->Size = size; p->DataType = datatype; diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/program/program.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/program/program.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/program/program.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/program/program.c 2015-03-18 08:43:35.000000000 +0000 @@ -79,7 +79,7 @@ STATIC_ASSERT(NUM_TEXTURE_TARGETS <= (1 << 4)); ctx->Program.ErrorPos = -1; - ctx->Program.ErrorString = _mesa_strdup(""); + ctx->Program.ErrorString = strdup(""); ctx->VertexProgram.Enabled = GL_FALSE; ctx->VertexProgram.PointSizeEnabled = @@ -176,7 +176,7 @@ free((void *) ctx->Program.ErrorString); if (!string) string = ""; - ctx->Program.ErrorString = _mesa_strdup(string); + ctx->Program.ErrorString = strdup(string); } @@ -483,7 +483,7 @@ assert(clone->Target == prog->Target); assert(clone->RefCount == 1); - clone->String = (GLubyte *) _mesa_strdup((char *) prog->String); + clone->String = (GLubyte *) strdup((char *) prog->String); clone->Format = prog->Format; clone->Instructions = _mesa_alloc_instructions(prog->NumInstructions); if (!clone->Instructions) { diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/program/prog_statevars.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/program/prog_statevars.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/program/prog_statevars.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/program/prog_statevars.c 2015-03-18 08:43:35.000000000 +0000 @@ -1045,7 +1045,7 @@ break; } - return _mesa_strdup(str); + return strdup(str); } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/state_tracker/st_cb_bufferobjects.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/state_tracker/st_cb_bufferobjects.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/state_tracker/st_cb_bufferobjects.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/state_tracker/st_cb_bufferobjects.c 2015-03-18 08:43:35.000000000 +0000 @@ -485,8 +485,8 @@ static const char zeros[16] = {0}; if (!pipe->clear_buffer) { - _mesa_buffer_clear_subdata(ctx, offset, size, - clearValue, clearValueSize, bufObj); + _mesa_ClearBufferSubData_sw(ctx, offset, size, + clearValue, clearValueSize, bufObj); return; } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/state_tracker/st_glsl_to_tgsi.cpp mesa-10.6.0~git20150318.27bf37ba/src/mesa/state_tracker/st_glsl_to_tgsi.cpp --- mesa-10.6.0~git20150310.5750595c/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 2015-03-18 08:43:35.000000000 +0000 @@ -332,6 +332,7 @@ int glsl_version; bool native_integers; bool have_sqrt; + bool have_fma; variable_storage *find_variable_storage(ir_variable *var); @@ -836,6 +837,7 @@ case3fid(ADD, UADD, DADD); case3fid(MUL, UMUL, DMUL); case3fid(MAD, UMAD, DMAD); + case3fid(FMA, UMAD, DFMA); case3(DIV, IDIV, UDIV); case4d(MAX, IMAX, UMAX, DMAX); case4d(MIN, IMIN, UMIN, DMIN); @@ -2222,10 +2224,11 @@ emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); break; case ir_triop_fma: - /* NOTE: Perhaps there should be a special opcode that enforces fused - * mul-add. Just use MAD for now. - */ - emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); + /* In theory, MAD is incorrect here. */ + if (have_fma) + emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); + else + emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); break; case ir_unop_interpolate_at_centroid: emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); @@ -5564,6 +5567,8 @@ v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); + v->have_fma = pscreen->get_shader_param(pscreen, ptarget, + PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); _mesa_copy_linked_program_data(shader->Stage, shader_program, prog); _mesa_generate_parameters_list_for_uniforms(shader_program, shader, diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/swrast/s_blend.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/swrast/s_blend.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/swrast/s_blend.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/swrast/s_blend.c 2015-03-18 08:43:35.000000000 +0000 @@ -48,9 +48,6 @@ #if defined(USE_MMX_ASM) #include "x86/mmx.h" #include "x86/common_x86_asm.h" -#define _BLENDAPI _ASMAPI -#else -#define _BLENDAPI #endif @@ -69,7 +66,7 @@ * No-op means the framebuffer values remain unchanged. * Any chanType ok. */ -static void _BLENDAPI +static void blend_noop(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { @@ -97,7 +94,7 @@ * Special case for glBlendFunc(GL_ONE, GL_ZERO) * Any chanType ok. */ -static void _BLENDAPI +static void blend_replace(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { @@ -117,7 +114,7 @@ * Common transparency blending mode: * glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA). */ -static void _BLENDAPI +static void blend_transparency_ubyte(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { @@ -162,7 +159,7 @@ } -static void _BLENDAPI +static void blend_transparency_ushort(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { @@ -200,7 +197,7 @@ } -static void _BLENDAPI +static void blend_transparency_float(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { @@ -242,7 +239,7 @@ * Add src and dest: glBlendFunc(GL_ONE, GL_ONE). * Any chanType ok. */ -static void _BLENDAPI +static void blend_add(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { @@ -308,7 +305,7 @@ * Blend min function. * Any chanType ok. */ -static void _BLENDAPI +static void blend_min(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { @@ -361,7 +358,7 @@ * Blend max function. * Any chanType ok. */ -static void _BLENDAPI +static void blend_max(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { @@ -415,7 +412,7 @@ * Modulate: result = src * dest * Any chanType ok. */ -static void _BLENDAPI +static void blend_modulate(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType) { diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/swrast/s_context.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/swrast/s_context.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/swrast/s_context.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/swrast/s_context.c 2015-03-18 08:43:35.000000000 +0000 @@ -409,7 +409,7 @@ * Called via swrast->BlendFunc. Examine GL state to choose a blending * function, then call it. */ -static void _ASMAPI +static void _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *src, const GLvoid *dst, GLenum chanType ) diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/swrast/s_context.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/swrast/s_context.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/swrast/s_context.h 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/swrast/s_context.h 2015-03-18 08:43:35.000000000 +0000 @@ -58,10 +58,10 @@ GLuint n, const GLfloat texcoords[][4], const GLfloat lambda[], GLfloat rgba[][4]); -typedef void (_ASMAPIP blend_func)( struct gl_context *ctx, GLuint n, - const GLubyte mask[], - GLvoid *src, const GLvoid *dst, - GLenum chanType); +typedef void (*blend_func)(struct gl_context *ctx, GLuint n, + const GLubyte mask[], + GLvoid *src, const GLvoid *dst, + GLenum chanType); typedef void (*swrast_point_func)( struct gl_context *ctx, const SWvertex *); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/tnl_dd/t_dd_triemit.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/tnl_dd/t_dd_triemit.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/tnl_dd/t_dd_triemit.h 2012-01-02 08:22:35.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/tnl_dd/t_dd_triemit.h 2015-03-18 08:43:35.000000000 +0000 @@ -16,13 +16,6 @@ "D" ((long)vb), \ "S" ((long)v) ); \ } while (0) -#elif defined(HAVE_LE32_VERTS) -#define COPY_DWORDS( j, vb, vertsize, v ) \ -do { \ - for ( j = 0 ; j < vertsize ; j++ ) \ - vb[j] = CPU_TO_LE32(((GLuint *)v)[j]); \ - vb += vertsize; \ -} while (0) #else #define COPY_DWORDS( j, vb, vertsize, v ) \ do { \ diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/vbo/vbo_attrib_tmp.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/vbo/vbo_attrib_tmp.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/vbo/vbo_attrib_tmp.h 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/vbo/vbo_attrib_tmp.h 2015-03-18 08:43:35.000000000 +0000 @@ -30,35 +30,30 @@ /* ATTR */ -#define ATTR( A, N, T, V0, V1, V2, V3 ) \ - ATTR_##T((A), (N), (T), (V0), (V1), (V2), (V3)) - -#define ATTR_GL_UNSIGNED_INT( A, N, T, V0, V1, V2, V3 ) \ - ATTR_UNION(A, N, T, UINT_AS_UNION(V0), UINT_AS_UNION(V1), \ - UINT_AS_UNION(V2), UINT_AS_UNION(V3)) -#define ATTR_GL_INT( A, N, T, V0, V1, V2, V3 ) \ - ATTR_UNION(A, N, T, INT_AS_UNION(V0), INT_AS_UNION(V1), \ +#define ATTRI( A, N, V0, V1, V2, V3 ) \ + ATTR_UNION(A, N, GL_INT, INT_AS_UNION(V0), INT_AS_UNION(V1), \ INT_AS_UNION(V2), INT_AS_UNION(V3)) -#define ATTR_GL_FLOAT( A, N, T, V0, V1, V2, V3 ) \ - ATTR_UNION(A, N, T, FLOAT_AS_UNION(V0), FLOAT_AS_UNION(V1),\ +#define ATTRUI( A, N, V0, V1, V2, V3 ) \ + ATTR_UNION(A, N, GL_UNSIGNED_INT, UINT_AS_UNION(V0), UINT_AS_UNION(V1), \ + UINT_AS_UNION(V2), UINT_AS_UNION(V3)) +#define ATTRF( A, N, V0, V1, V2, V3 ) \ + ATTR_UNION(A, N, GL_FLOAT, FLOAT_AS_UNION(V0), FLOAT_AS_UNION(V1),\ FLOAT_AS_UNION(V2), FLOAT_AS_UNION(V3)) /* float */ -#define ATTR1FV( A, V ) ATTR( A, 1, GL_FLOAT, (V)[0], 0, 0, 1 ) -#define ATTR2FV( A, V ) ATTR( A, 2, GL_FLOAT, (V)[0], (V)[1], 0, 1 ) -#define ATTR3FV( A, V ) ATTR( A, 3, GL_FLOAT, (V)[0], (V)[1], (V)[2], 1 ) -#define ATTR4FV( A, V ) ATTR( A, 4, GL_FLOAT, (V)[0], (V)[1], (V)[2], (V)[3] ) - -#define ATTR1F( A, X ) ATTR( A, 1, GL_FLOAT, X, 0, 0, 1 ) -#define ATTR2F( A, X, Y ) ATTR( A, 2, GL_FLOAT, X, Y, 0, 1 ) -#define ATTR3F( A, X, Y, Z ) ATTR( A, 3, GL_FLOAT, X, Y, Z, 1 ) -#define ATTR4F( A, X, Y, Z, W ) ATTR( A, 4, GL_FLOAT, X, Y, Z, W ) +#define ATTR1FV( A, V ) ATTRF( A, 1, (V)[0], 0, 0, 1 ) +#define ATTR2FV( A, V ) ATTRF( A, 2, (V)[0], (V)[1], 0, 1 ) +#define ATTR3FV( A, V ) ATTRF( A, 3, (V)[0], (V)[1], (V)[2], 1 ) +#define ATTR4FV( A, V ) ATTRF( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] ) + +#define ATTR1F( A, X ) ATTRF( A, 1, X, 0, 0, 1 ) +#define ATTR2F( A, X, Y ) ATTRF( A, 2, X, Y, 0, 1 ) +#define ATTR3F( A, X, Y, Z ) ATTRF( A, 3, X, Y, Z, 1 ) +#define ATTR4F( A, X, Y, Z, W ) ATTRF( A, 4, X, Y, Z, W ) -/* int */ -#define ATTRI( A, N, X, Y, Z, W) ATTR( A, N, GL_INT, \ - X, Y, Z, W ) +/* int */ #define ATTR2IV( A, V ) ATTRI( A, 2, (V)[0], (V)[1], 0, 1 ) #define ATTR3IV( A, V ) ATTRI( A, 3, (V)[0], (V)[1], (V)[2], 1 ) #define ATTR4IV( A, V ) ATTRI( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] ) @@ -70,9 +65,6 @@ /* uint */ -#define ATTRUI( A, N, X, Y, Z, W) ATTR( A, N, GL_UNSIGNED_INT, \ - X, Y, Z, W ) - #define ATTR2UIV( A, V ) ATTRUI( A, 2, (V)[0], (V)[1], 0, 1 ) #define ATTR3UIV( A, V ) ATTRUI( A, 3, (V)[0], (V)[1], (V)[2], 1 ) #define ATTR4UIV( A, V ) ATTRUI( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] ) @@ -82,7 +74,7 @@ #define ATTR3UI( A, X, Y, Z ) ATTRUI( A, 3, X, Y, Z, 1 ) #define ATTR4UI( A, X, Y, Z, W ) ATTRUI( A, 4, X, Y, Z, W ) -#define MAT_ATTR( A, N, V ) ATTR( A, N, GL_FLOAT, (V)[0], (V)[1], (V)[2], (V)[3] ) +#define MAT_ATTR( A, N, V ) ATTRF( A, N, (V)[0], (V)[1], (V)[2], (V)[3] ) static inline float conv_ui10_to_norm_float(unsigned ui10) { @@ -94,20 +86,20 @@ return ui2 / 3.0f; } -#define ATTRUI10_1( A, UI ) ATTR( A, 1, GL_FLOAT, (UI) & 0x3ff, 0, 0, 1 ) -#define ATTRUI10_2( A, UI ) ATTR( A, 2, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, 0, 1 ) -#define ATTRUI10_3( A, UI ) ATTR( A, 3, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, ((UI) >> 20) & 0x3ff, 1 ) -#define ATTRUI10_4( A, UI ) ATTR( A, 4, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, ((UI) >> 20) & 0x3ff, ((UI) >> 30) & 0x3 ) +#define ATTRUI10_1( A, UI ) ATTRF( A, 1, (UI) & 0x3ff, 0, 0, 1 ) +#define ATTRUI10_2( A, UI ) ATTRF( A, 2, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, 0, 1 ) +#define ATTRUI10_3( A, UI ) ATTRF( A, 3, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, ((UI) >> 20) & 0x3ff, 1 ) +#define ATTRUI10_4( A, UI ) ATTRF( A, 4, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, ((UI) >> 20) & 0x3ff, ((UI) >> 30) & 0x3 ) -#define ATTRUI10N_1( A, UI ) ATTR( A, 1, GL_FLOAT, conv_ui10_to_norm_float((UI) & 0x3ff), 0, 0, 1 ) -#define ATTRUI10N_2( A, UI ) ATTR( A, 2, GL_FLOAT, \ +#define ATTRUI10N_1( A, UI ) ATTRF( A, 1, conv_ui10_to_norm_float((UI) & 0x3ff), 0, 0, 1 ) +#define ATTRUI10N_2( A, UI ) ATTRF( A, 2, \ conv_ui10_to_norm_float((UI) & 0x3ff), \ conv_ui10_to_norm_float(((UI) >> 10) & 0x3ff), 0, 1 ) -#define ATTRUI10N_3( A, UI ) ATTR( A, 3, GL_FLOAT, \ +#define ATTRUI10N_3( A, UI ) ATTRF( A, 3, \ conv_ui10_to_norm_float((UI) & 0x3ff), \ conv_ui10_to_norm_float(((UI) >> 10) & 0x3ff), \ conv_ui10_to_norm_float(((UI) >> 20) & 0x3ff), 1 ) -#define ATTRUI10N_4( A, UI ) ATTR( A, 4, GL_FLOAT, \ +#define ATTRUI10N_4( A, UI ) ATTRF( A, 4, \ conv_ui10_to_norm_float((UI) & 0x3ff), \ conv_ui10_to_norm_float(((UI) >> 10) & 0x3ff), \ conv_ui10_to_norm_float(((UI) >> 20) & 0x3ff), \ @@ -180,30 +172,30 @@ } } -#define ATTRI10_1( A, I10 ) ATTR( A, 1, GL_FLOAT, conv_i10_to_i((I10) & 0x3ff), 0, 0, 1 ) -#define ATTRI10_2( A, I10 ) ATTR( A, 2, GL_FLOAT, \ +#define ATTRI10_1( A, I10 ) ATTRF( A, 1, conv_i10_to_i((I10) & 0x3ff), 0, 0, 1 ) +#define ATTRI10_2( A, I10 ) ATTRF( A, 2, \ conv_i10_to_i((I10) & 0x3ff), \ conv_i10_to_i(((I10) >> 10) & 0x3ff), 0, 1 ) -#define ATTRI10_3( A, I10 ) ATTR( A, 3, GL_FLOAT, \ +#define ATTRI10_3( A, I10 ) ATTRF( A, 3, \ conv_i10_to_i((I10) & 0x3ff), \ conv_i10_to_i(((I10) >> 10) & 0x3ff), \ conv_i10_to_i(((I10) >> 20) & 0x3ff), 1 ) -#define ATTRI10_4( A, I10 ) ATTR( A, 4, GL_FLOAT, \ +#define ATTRI10_4( A, I10 ) ATTRF( A, 4, \ conv_i10_to_i((I10) & 0x3ff), \ conv_i10_to_i(((I10) >> 10) & 0x3ff), \ conv_i10_to_i(((I10) >> 20) & 0x3ff), \ conv_i2_to_i(((I10) >> 30) & 0x3)) -#define ATTRI10N_1(ctx, A, I10) ATTR(A, 1, GL_FLOAT, conv_i10_to_norm_float(ctx, (I10) & 0x3ff), 0, 0, 1 ) -#define ATTRI10N_2(ctx, A, I10) ATTR(A, 2, GL_FLOAT, \ +#define ATTRI10N_1(ctx, A, I10) ATTRF(A, 1, conv_i10_to_norm_float(ctx, (I10) & 0x3ff), 0, 0, 1 ) +#define ATTRI10N_2(ctx, A, I10) ATTRF(A, 2, \ conv_i10_to_norm_float(ctx, (I10) & 0x3ff), \ conv_i10_to_norm_float(ctx, ((I10) >> 10) & 0x3ff), 0, 1 ) -#define ATTRI10N_3(ctx, A, I10) ATTR(A, 3, GL_FLOAT, \ +#define ATTRI10N_3(ctx, A, I10) ATTRF(A, 3, \ conv_i10_to_norm_float(ctx, (I10) & 0x3ff), \ conv_i10_to_norm_float(ctx, ((I10) >> 10) & 0x3ff), \ conv_i10_to_norm_float(ctx, ((I10) >> 20) & 0x3ff), 1 ) -#define ATTRI10N_4(ctx, A, I10) ATTR(A, 4, GL_FLOAT, \ +#define ATTRI10N_4(ctx, A, I10) ATTRF(A, 4, \ conv_i10_to_norm_float(ctx, (I10) & 0x3ff), \ conv_i10_to_norm_float(ctx, ((I10) >> 10) & 0x3ff), \ conv_i10_to_norm_float(ctx, ((I10) >> 20) & 0x3ff), \ diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/vbo/vbo_exec_api.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/vbo/vbo_exec_api.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/vbo/vbo_exec_api.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/vbo/vbo_exec_api.c 2015-03-18 08:43:35.000000000 +0000 @@ -1239,7 +1239,7 @@ { GET_CURRENT_CONTEXT(ctx); if (index < MAX_VERTEX_GENERIC_ATTRIBS) - ATTR(VBO_ATTRIB_GENERIC0 + index, 4, GL_FLOAT, x, y, z, w); + ATTRF(VBO_ATTRIB_GENERIC0 + index, 4, x, y, z, w); else ERROR(GL_INVALID_VALUE); } diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/x86/3dnow.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/3dnow.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/x86/3dnow.c 2015-02-25 15:01:21.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/3dnow.c 2015-03-18 08:43:35.000000000 +0000 @@ -47,20 +47,20 @@ DECLARE_XFORM_GROUP( 3dnow, 4 ) -extern void _ASMAPI +extern void _mesa_v16_3dnow_general_xform( GLfloat *first_vert, const GLfloat *m, const GLfloat *src, GLuint src_stride, GLuint count ); -extern void _ASMAPI +extern void _mesa_3dnow_project_vertices( GLfloat *first, GLfloat *last, const GLfloat *m, GLuint stride ); -extern void _ASMAPI +extern void _mesa_3dnow_project_clipped_vertices( GLfloat *first, GLfloat *last, const GLfloat *m, diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/x86/common_x86.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/common_x86.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/x86/common_x86.c 2015-03-10 16:52:50.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/common_x86.c 2015-03-18 08:43:35.000000000 +0000 @@ -68,12 +68,12 @@ /* No reason for this to be public. */ -extern GLuint _ASMAPI _mesa_x86_has_cpuid(void); -extern void _ASMAPI _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx); -extern GLuint _ASMAPI _mesa_x86_cpuid_eax(GLuint op); -extern GLuint _ASMAPI _mesa_x86_cpuid_ebx(GLuint op); -extern GLuint _ASMAPI _mesa_x86_cpuid_ecx(GLuint op); -extern GLuint _ASMAPI _mesa_x86_cpuid_edx(GLuint op); +extern GLuint _mesa_x86_has_cpuid(void); +extern void _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx); +extern GLuint _mesa_x86_cpuid_eax(GLuint op); +extern GLuint _mesa_x86_cpuid_ebx(GLuint op); +extern GLuint _mesa_x86_cpuid_ecx(GLuint op); +extern GLuint _mesa_x86_cpuid_edx(GLuint op); #if defined(USE_SSE_ASM) diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/x86/mmx.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/mmx.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/x86/mmx.h 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/mmx.h 2015-03-18 08:43:35.000000000 +0000 @@ -31,27 +31,27 @@ struct gl_context; -extern void _ASMAPI +extern void _mesa_mmx_blend_transparency( struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *rgba, const GLvoid *dest, GLenum chanType ); -extern void _ASMAPI +extern void _mesa_mmx_blend_add( struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *rgba, const GLvoid *dest, GLenum chanType ); -extern void _ASMAPI +extern void _mesa_mmx_blend_min( struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *rgba, const GLvoid *dest, GLenum chanType ); -extern void _ASMAPI +extern void _mesa_mmx_blend_max( struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *rgba, const GLvoid *dest, GLenum chanType ); -extern void _ASMAPI +extern void _mesa_mmx_blend_modulate( struct gl_context *ctx, GLuint n, const GLubyte mask[], GLvoid *rgba, const GLvoid *dest, GLenum chanType ); diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/x86/sse.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/sse.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/x86/sse.c 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/sse.c 2015-03-18 08:43:35.000000000 +0000 @@ -46,35 +46,35 @@ #if 1 /* Some functions are not written in SSE-assembly, because the fpu ones are faster */ -extern void _ASMAPI _mesa_sse_transform_normals_no_rot( NORM_ARGS ); -extern void _ASMAPI _mesa_sse_transform_rescale_normals( NORM_ARGS ); -extern void _ASMAPI _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS ); +extern void _mesa_sse_transform_normals_no_rot( NORM_ARGS ); +extern void _mesa_sse_transform_rescale_normals( NORM_ARGS ); +extern void _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS ); -extern void _ASMAPI _mesa_sse_transform_points4_general( XFORM_ARGS ); -extern void _ASMAPI _mesa_sse_transform_points4_3d( XFORM_ARGS ); +extern void _mesa_sse_transform_points4_general( XFORM_ARGS ); +extern void _mesa_sse_transform_points4_3d( XFORM_ARGS ); /* XXX this function segfaults, see below */ -extern void _ASMAPI _mesa_sse_transform_points4_identity( XFORM_ARGS ); +extern void _mesa_sse_transform_points4_identity( XFORM_ARGS ); /* XXX this one works, see below */ -extern void _ASMAPI _mesa_x86_transform_points4_identity( XFORM_ARGS ); +extern void _mesa_x86_transform_points4_identity( XFORM_ARGS ); #else DECLARE_NORM_GROUP( sse ) #endif -extern void _ASMAPI +extern void _mesa_v16_sse_general_xform( GLfloat *first_vert, const GLfloat *m, const GLfloat *src, GLuint src_stride, GLuint count ); -extern void _ASMAPI +extern void _mesa_sse_project_vertices( GLfloat *first, GLfloat *last, const GLfloat *m, GLuint stride ); -extern void _ASMAPI +extern void _mesa_sse_project_clipped_vertices( GLfloat *first, GLfloat *last, const GLfloat *m, diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/x86/x86_xform.c mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/x86_xform.c --- mesa-10.6.0~git20150310.5750595c/src/mesa/x86/x86_xform.c 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/x86_xform.c 2015-03-18 08:43:35.000000000 +0000 @@ -54,7 +54,7 @@ DECLARE_XFORM_GROUP( x86, 4 ) -extern GLvector4f * _ASMAPI +extern GLvector4f * _mesa_x86_cliptest_points4( GLvector4f *clip_vec, GLvector4f *proj_vec, GLubyte clipMask[], @@ -62,7 +62,7 @@ GLubyte *andMask, GLboolean viewport_z_clip ); -extern GLvector4f * _ASMAPI +extern GLvector4f * _mesa_x86_cliptest_points4_np( GLvector4f *clip_vec, GLvector4f *proj_vec, GLubyte clipMask[], @@ -70,7 +70,7 @@ GLubyte *andMask, GLboolean viewport_z_clip ); -extern void _ASMAPI +extern void _mesa_v16_x86_cliptest_points4( GLfloat *first_vert, GLfloat *last_vert, GLubyte *or_mask, @@ -78,7 +78,7 @@ GLubyte *clip_mask, GLboolean viewport_z_clip ); -extern void _ASMAPI +extern void _mesa_v16_x86_general_xform( GLfloat *dest, const GLfloat *m, const GLfloat *src, diff -Nru mesa-10.6.0~git20150310.5750595c/src/mesa/x86/x86_xform.h mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/x86_xform.h --- mesa-10.6.0~git20150310.5750595c/src/mesa/x86/x86_xform.h 2014-04-20 07:52:19.000000000 +0000 +++ mesa-10.6.0~git20150318.27bf37ba/src/mesa/x86/x86_xform.h 2015-03-18 08:43:35.000000000 +0000 @@ -39,13 +39,13 @@ const GLvector4f *from_vec #define DECLARE_XFORM_GROUP( pfx, sz ) \ -extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS ); +extern void _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \ +extern void _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \ +extern void _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \ +extern void _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \ +extern void _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \ +extern void _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \ +extern void _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS ); #define ASSIGN_XFORM_GROUP( pfx, sz ) \ _mesa_transform_tab[sz][MATRIX_GENERAL] = \ @@ -75,14 +75,14 @@ GLvector4f *dest #define DECLARE_NORM_GROUP( pfx ) \ -extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \ -extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS ); +extern void _mesa_##pfx##_rescale_normals( NORM_ARGS ); \ +extern void _mesa_##pfx##_normalize_normals( NORM_ARGS ); \ +extern void _mesa_##pfx##_transform_normals( NORM_ARGS ); \ +extern void _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \ +extern void _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \ +extern void _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \ +extern void _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \ +extern void _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS ); #define ASSIGN_NORM_GROUP( pfx ) \ _mesa_normal_tab[NORM_RESCALE] = \