diff -Nru mesa-20.2.1/bin/gen_release_notes.py mesa-20.2.6/bin/gen_release_notes.py --- mesa-20.2.1/bin/gen_release_notes.py 2020-10-14 17:19:10.226515500 +0000 +++ mesa-20.2.6/bin/gen_release_notes.py 2020-12-16 21:42:03.478109800 +0000 @@ -113,24 +113,25 @@ commits = await gather_commits(version) issues: typing.List[str] = [] - for commit in commits.split('\n'): - sha, message = commit.split(maxsplit=1) - p = await asyncio.create_subprocess_exec( - 'git', 'log', '--max-count', '1', r'--format=%b', sha, - stdout=asyncio.subprocess.PIPE) - _out, _ = await p.communicate() - out = _out.decode().split('\n') - for line in reversed(out): - if line.startswith('Closes:'): - bug = line.lstrip('Closes:').strip() - break - else: - raise Exception('No closes found?') - if bug.startswith('h'): - # This means we have a bug in the form "Closes: https://..." - issues.append(os.path.basename(urllib.parse.urlparse(bug).path)) - else: - issues.append(bug.lstrip('#')) + if commits: + for commit in commits.split('\n'): + sha, message = commit.split(maxsplit=1) + p = await asyncio.create_subprocess_exec( + 'git', 'log', '--max-count', '1', r'--format=%b', sha, + stdout=asyncio.subprocess.PIPE) + _out, _ = await p.communicate() + out = _out.decode().split('\n') + for line in reversed(out): + if line.startswith('Closes:'): + bug = line.lstrip('Closes:').strip() + break + else: + raise Exception('No closes found?') + if bug.startswith('h'): + # This means we have a bug in the form "Closes: https://..." + issues.append(os.path.basename(urllib.parse.urlparse(bug).path)) + else: + issues.append(bug.lstrip('#')) loop = asyncio.get_event_loop() async with aiohttp.ClientSession(loop=loop) as session: diff -Nru mesa-20.2.1/bin/pick-ui.py mesa-20.2.6/bin/pick-ui.py --- mesa-20.2.1/bin/pick-ui.py 2020-10-14 17:19:10.226515500 +0000 +++ mesa-20.2.6/bin/pick-ui.py 2020-12-16 21:42:03.479110000 +0000 @@ -28,6 +28,6 @@ if __name__ == "__main__": u = UI() evl = urwid.AsyncioEventLoop(loop=asyncio.get_event_loop()) - loop = urwid.MainLoop(u.render(), PALETTE, event_loop=evl) + loop = urwid.MainLoop(u.render(), PALETTE, event_loop=evl, handle_mouse=False) u.mainloop = loop loop.run() diff -Nru mesa-20.2.1/debian/changelog mesa-20.2.6/debian/changelog --- mesa-20.2.1/debian/changelog 2020-12-18 09:24:13.000000000 +0000 +++ mesa-20.2.6/debian/changelog 2020-12-18 09:24:15.000000000 +0000 @@ -1,3 +1,9 @@ +mesa (20.2.6-0ubuntu0.20.04.1) focal; urgency=medium + + * New upstream bugfix release. (LP: #1908699) + + -- Timo Aaltonen Fri, 18 Dec 2020 10:35:45 +0200 + mesa (20.2.1-1~ubuntu0.20.04.2) focal; urgency=medium * ppc64el-ftbfs-fix-type-collisions.diff: Fix build failure on ppc64el. diff -Nru mesa-20.2.1/docs/relnotes/20.2.1.rst mesa-20.2.6/docs/relnotes/20.2.1.rst --- mesa-20.2.1/docs/relnotes/20.2.1.rst 2020-10-14 17:19:10.249849000 +0000 +++ mesa-20.2.6/docs/relnotes/20.2.1.rst 2020-12-16 21:42:03.500110000 +0000 @@ -19,7 +19,7 @@ :: - TBD. + d1a46d9a3f291bc0e0374600bdcb59844fa3eafaa50398e472a36fc65fd0244a mesa-20.2.1.tar.xz New features diff -Nru mesa-20.2.1/docs/relnotes/20.2.2.rst mesa-20.2.6/docs/relnotes/20.2.2.rst --- mesa-20.2.1/docs/relnotes/20.2.2.rst 1970-01-01 00:00:00.000000000 +0000 +++ mesa-20.2.6/docs/relnotes/20.2.2.rst 2020-12-16 21:42:03.500110000 +0000 @@ -0,0 +1,147 @@ +Mesa 20.2.2 Release Notes / 2020-11-06 +====================================== + +Mesa 20.2.2 is a bug fix release which fixes bugs found since the 20.2.1 release. + +Mesa 20.2.2 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is **only** available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 20.2.2 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + 1f93eb1090cf71490cd0e204e04f8427a82b6ed534b7f49ca50cea7dcc89b861 mesa-20.2.2.tar.xz + + +New features +------------ + +- None + + +Bug fixes +--------- + +- anv: dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d* failures +- anv: dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d* failures +- radv/aco: Vertex explosion on RPCS3 +- Gnome 3.38 with Xwayland has screen corruption for X11 apps. +- RADV: Death Stranding glitchy sky rendering +- Crash in glDrawArrays on Intel iris +- deinterlace_vaapi=rate=field does not double output's actual frame rate on AMD +- Steam game Haydee leans on implementation-dependent behavior +- vc4 in 20.2-rc has regression causing app to crash +- [RADV/ACO] Star Citizen Lighting/Shadow Issue + + +Changes +------- + +Bas Nieuwenhuizen (3): + +- radv: Fix 1D compressed mipmaps on GFX9. +- radv: Do not access set layout during vkCmdBindDescriptorSets. +- radv: Fix variable name collision. + +Dave Airlie (1): + +- gallivm: zero init the temporary register storage. + +Dylan Baker (9): + +- docs: add SHA256 sums for 20.2.1 +- .pick_status.json: Update to f29c81f863c9879a6a87724cbdae1e1818f3f6b4 +- .pick_status.json: Update to aea74eac3d7706ed8d870504b163356e3f104a4c +- .pick_status.json: Update to 7c5129985bcac75053823a31674e8a1e2629230c +- .pick_status.json: Update to 3c87ac1f60875b5bbd4facca22fc426ee747997a +- .pick_status.json: Update to d0f8fe5909107aa342f62813ced9ce535ed6da32 +- .pick_status.json: Update to 025050bae73d0598d788e3c307328670a3bf51c1 +- .pick_status.json: Update to b92eadb29cc8ef09096d9196434d49e35a3eccaf +- .pick_status.json: Update to 8077f3f4c4a3d8007caa30eed93fed1c6bbf3c5a + +Jose Maria Casanova Crespo (2): + +- vc4: Add missing load_ubo set_align in yuv_blit fs. +- vc4: Enable nir_lower_io for uniforms + +Lionel Landwerlin (3): + +- intel/dev: Bump Max EU per subslice/dualsubslice +- anv: fix source/destination layers for 3D blits +- blorp: allow blits with floating point source layers + +Lucas Stach (2): + +- etnaviv: drm: fix BO refcount race +- etnaviv: blt: properly program surface TS offset for clears + +Marcin Ślusarz (2): + +- vulkan/wsi: fix possible random stalls in wsi_display_wait_for_event +- intel/tools: fix invalid type in argument to printf + +Marek Olšák (2): + +- Revert "radeonsi/gfx10: disable vertex grouping" +- winsys/amdgpu: remove incorrect assertion check against max_check_space_size + +Michael Tretter (1): + +- etnaviv: free tgsi tokens when shader state is deleted + +Michel Dänzer (3): + +- loader/dri3: Only allocate additional buffers if needed +- loader/dri3: Keep current number of back buffers if frame was skipped +- loader/dri3: Allocate up to 4 back buffers for page flips + +Nanley Chery (3): + +- st/mesa: Add missing sentinels in format_map[] +- intel/isl: Drop redundant unpack of unorm channels +- isl: Fix the aux-map encoding for D24_UNORM_X8 + +Rhys Perry (4): + +- nir/opt_load_store_vectorize: don't vectorize stores across demote +- aco: add missing SCC clobber in get_buffer_size +- aco: update phi_map in add_subdword_operand() +- aco: ignore the ACO-inserted continue in create_continue_phis() + +Rob Clark (1): + +- freedreno: Disallow tiled if SHARED and not QCOM_COMPRESSED + +Ryan Neph (1): + +- virgl: Fixes portal2 binary name in tweak config + +Samuel Pitoiset (1): + +- aco: fix determining if LOD is zero for nir_texop_txf/nir_texop_txs + +Tapani Pälli (2): + +- gallivm/nir: handle nir_op_flt in lp_build_nir_llvm +- iris: fix the order of src and dst for fence memcpy + +Thong Thai (1): + +- frontends/va/postproc: Un-break field flag + +Timothy Arceri (1): + +- glsl: relax rule on varying matching for shaders older than 4.00 + +Tony Wasserka (1): + +- aco/isel: Always export position data from VS/NGG diff -Nru mesa-20.2.1/docs/relnotes/20.2.3.rst mesa-20.2.6/docs/relnotes/20.2.3.rst --- mesa-20.2.1/docs/relnotes/20.2.3.rst 1970-01-01 00:00:00.000000000 +0000 +++ mesa-20.2.6/docs/relnotes/20.2.3.rst 2020-12-16 21:42:03.500110000 +0000 @@ -0,0 +1,184 @@ +Mesa 20.2.3 Release Notes / 2020-11-23 +====================================== + +Mesa 20.2.3 is a bug fix release which fixes bugs found since the 20.2.2 release. + +Mesa 20.2.3 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is **only** available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 20.2.3 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + ae1b240e11531df528d14dc214d2dc4d2b4f2e835c6230ba0b492b171eceb82b mesa-20.2.3.tar.xz + + +New features +------------ + +- None + + +Bug fixes +--------- + +- glcpp test 084-unbalanced-parentheses fails with bison 3.6.y +- iris: glClear with FBO imported from DMA-BUF doesn't work +- [spirv-fuzz] Shader causes an assertion failure in nir_opt_large_constants +- Regnum Online UBO break after game update +- Random corruption in KDE with Mesa 20.2 in VMware + + +Changes +------- + +Bas Nieuwenhuizen (1): + +- radv: Add ETC2 support on RAVEN2. + +Brendan Dougherty (1): + +- mesa: Fix vertex_format_to_pipe_format index. + +Chad Versace (2): + +- anv/image: Check DISJOINT in vkGetPhysicalDeviceImageFormatProperties2 (v2) +- anv/image: Fix isl_surf_usage_flags for stencil images + +Christian Gmeiner (1): + +- etnaviv: nir: do not run opt loop after nir_lower_bool_xxx(..) + +Dave Airlie (1): + +- draw: fix tess eval pipeline statistics. + +Dylan Baker (6): + +- dcs: Add sha256 sums for 20.2.2 +- .pick_status.json: Update to bf5cea7232f9ee2934c212211ebefb6fe766526d +- .pick_status.json: Update to 87dc3106b077199b829a082e32ec33d0c6d400ab +- .pick_status.json: Mark 87934f02f9da94f1a493096049c229b973e4785c as backported +- .pick_status.json: Mark ea326912575fad09af59486ad62d126c4ea0ede7 as backported +- .pick_status.json: Update to bac6cc586fe4c1b24351e0574d3a961eb631f6ae + +Eric Anholt (3): + +- freedreno/cffdec: When .mergedregs is set, don't count half regs. +- util/set: Fix the _mesa_set_clear function to not leave tombstones. +- freedreno: Fix leak of shader binary on disk cache hits. + +Erik Faye-Lund (2): + +- gallium/util: do not pass undefined sample-count +- mesa/main: add missing include in glformats.h + +Gert Wollny (1): + +- r600: revert disabling llvm draw + +Igor V. Kovalenko (1): + +- r600: amend space check for chips older than EVERGREEN + +Jason Ekstrand (4): + +- mesa/spirv: Lower variable initializers for global variables +- nir: Handle incomplete derefs in split_struct_vars +- nir/opt_intrinsics: Report progress for the gl_SampleMask optimization +- intel/fs: Fix use of undefined value in fixup_nomask_control_flow + +Kenneth Graunke (2): + +- iris: fix source/destination layers for 3D blits +- intel/fs: Fix sampler message headers on Gen11+ when using scratch + +Krunal Patel (2): + +- gallium/auxiliary/vl: Odd Dimensions are failing +- radeon/vcn: Bitrate not updated when changing framerate + +Louis Li (1): + +- radeon/radeon_vce: fix out of target bitrate in CBR mode (H.264) + +Marcin Ślusarz (2): + +- anv: always annotate memory returned from anv_gem_mmap +- nir: handle float atomics in copy propagation pass + +Marek Olšák (3): + +- radeonsi: do VGT_FLUSH when switching NGG -> legacy on Sienna Cichlid +- radeonsi: only do VGT_FLUSH for fast launch if previous draw was normal launch +- radeonsi: fix scan_instruction for bindless inc_wrap/dec_wrap atomics + +Matt Turner (1): + +- glcpp: Handle bison-3.6 error message changes + +Michel Dänzer (2): + +- i965/bufmgr: Handle NULL bufmgr in brw_bufmgr_get_for_fd +- iris/bufmgr: Handle NULL bufmgr in iris_bufmgr_get_for_fd + +Nanley Chery (4): + +- iris: Flush dmabufs during context flushes +- mesa: Add and use _mesa_has_depth_float_channel +- mesa: Clamp some depth values in glClearBufferfv +- mesa: Clamp some depth values in glClearBufferfi + +Raven (1): + +- winsys/svga: fix display corruption after surface_init + +Rhys Perry (4): + +- radv: fix shader caching with discard->demote workaround +- radv: fix shader caching with NaN fixup workaround +- nir: add nir_alu_src_is_trivial_ssa() +- nir: skip bcsel with non-trivial swizzle in opt_simplify_bcsel_of_phi() + +Rob Clark (2): + +- freedreno: Protect gmem_cache ralloc allocations +- freedreno/ir3: Fix crash in shader compile fail path + +Tapani Pälli (3): + +- mesa/st: call memobj_destroy only if there is memory imported +- mesa: do not throw _mesa_problem when invalid enum is used +- egl/dri2: fix race between image create and egl_image_target_texture + +Timothy Arceri (1): + +- glsl: add extra pp tokens workaround and enable for CoR + +Tony Wasserka (1): + +- aco/ra: Fix counting of subdword variables in get_reg_create_vector + +Vinson Lee (4): + +- os: Fix open result check. +- amd/addrlib: Add missing va_end. +- frontends/va: Fix *num_entrypoints check. +- vdpau: Add missing printf format specifier. + +Woody Chow (1): + +- st/mesa: Fix EGLImageTargetTexture2D for GL_TEXTURE_2D + +Yevhenii Kolesnikov (1): + +- nir/large_constants: only search for constant duplicates diff -Nru mesa-20.2.1/docs/relnotes/20.2.4.rst mesa-20.2.6/docs/relnotes/20.2.4.rst --- mesa-20.2.1/docs/relnotes/20.2.4.rst 1970-01-01 00:00:00.000000000 +0000 +++ mesa-20.2.6/docs/relnotes/20.2.4.rst 2020-12-16 21:42:03.500110000 +0000 @@ -0,0 +1,135 @@ +Mesa 20.2.4 Release Notes / 2020-12-04 +====================================== + +Mesa 20.2.4 is a bug fix release which fixes bugs found since the 20.2.3 release. + +Mesa 20.2.4 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is **only** available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 20.2.4 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + 0572dc6015d2e1c50f67823edd16855ae9b6feded0a1470598404e75e64aa092 mesa-20.2.4.tar.xz + + +New features +------------ + +- None + + +Bug fixes +--------- + +- [AMDGPU NAVI 5700xt] Large parts of the Blender viewport does not render correctly if an object with hair is moved. +- zink: regression after !7606 + + +Changes +------- + +Christian Gmeiner (1): + +- etnaviv/drm: fix evil-twin etna_drm_table_lock + +Daniel Stone (1): + +- freedreno: Add missing dependency to build + +Danylo Piliaiev (1): + +- freedreno/a6xx: Fix typo in height alignment calculation in a6xx layout + +Dave Airlie (1): + +- llvmpipe/setup: move point stats collection earlier. + +Dylan Baker (6): + +- docs: Add relnotes for 20.2.3 +- .pick_status.json: Update to a92f597b98bb032b904c7c8a8c3a9fe798b51915 +- .pick_status.json: Update to 89f6b72f19dbc503386643c6283047bdb1013bef +- amd/llvm: run clang-format +- .pick_status.json: Update to 872c4bcd27db7b7ca26abe9fc090ae26d502156f +- pick-ui: don't grab the mouse + +Eric Anholt (5): + +- freedreno: Fix leak of u_transfer_helper. +- gallium: Fix leak of bound SSBOs at CSO context destruction. +- gallivm: Fix max const buffer count. +- gallium: Fix leak of currently bound UBOs at CSO context destruction. +- freedreno: Break out of "should we free the entry" loop once we've freed. + +Eric Engestrom (1): + +- gitlab-ci: drop deprecated platforms that snuck in when nobody was watching + +Erik Faye-Lund (1): + +- zink: fix layered resolves + +Ian Romanick (1): + +- intel/compiler: Rotate instructions ROR and ROL cannot have source modifiers + +James Park (1): + +- radv: Fix leak in radv_amdgpu_winsys_destroy() + +Krunal Patel (1): + +- radeon/vce: Bitrate not updated when changing framerate + +Lionel Landwerlin (1): + +- anv: fix descriptor pool leak in VMA object + +Lucas Stach (1): + +- etnaviv: fix disabling of INT filter for real + +Marek Olšák (5): + +- mesa: call FLUSH_VERTICES before changing sampler uniforms +- ac/nir: fix a typo in ac_are_tessfactors_def_in_all_invocs +- radeonsi: fix a memory leak in si_create_dcc_retile_cs +- radeonsi: fix a nasty bug in si_pm4.c +- radeonsi: disable WGP mode on gfx10.3 to prevent hangs + +Pierre-Eric Pelloux-Prayer (1): + +- radeonsi/gfx10: flush gfx cs on ngg -> legacy transition + +Rhys Perry (7): + +- nir/unsigned_upper_bound: fix buffer overflow in search_phi_bcsel +- nir: fix sampler_lod_parameters_pan indices +- aco: don't combine precise max(min()) to med3 +- aco: fix combine_constant_comparison_ordering() NaN check with 16/64-bit +- aco: disallow various v_add_u32 opts if modifiers are used +- aco: disable omod if the sign of zeros should be preserved +- aco: fix fp16 *0.5 omod + +Suresh Guttula (2): + +- gallium: update abs_delta segementation parameter +- radeon/vcn : Corrected dpb_size calculation for VP9_2 + +Tapani Pälli (1): + +- iris: initialize shared screen->vtbl only once + +Timur Kristóf (1): + +- aco/optimizer: Only set scc_needed when it is actually needed. diff -Nru mesa-20.2.1/docs/relnotes/20.2.5.rst mesa-20.2.6/docs/relnotes/20.2.5.rst --- mesa-20.2.1/docs/relnotes/20.2.5.rst 1970-01-01 00:00:00.000000000 +0000 +++ mesa-20.2.6/docs/relnotes/20.2.5.rst 2020-12-16 21:42:03.500110000 +0000 @@ -0,0 +1,180 @@ +Mesa 20.2.5 Release Notes / 2020-12-16 +====================================== + +Mesa 20.2.5 is a bug fix release which fixes bugs found since the 20.2.4 release. + +Mesa 20.2.5 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is **only** available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 20.2.5 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + 0ebec6a22660d83f5ed437a145dc703c9398cd1376143aef273ea85f9dc5dd98 mesa-20.2.5.tar.xz + + +New features +------------ + +- None + + +Bug fixes +--------- + +- Mesa 20.3.0 and older ATi/Radeon cards fails +- Amber test NIR validation failed after spirv_to_nir +- Amber test validate_phi_src +- [RADV] broken stencil behaviour when using extended dynamic stencil state +- [RADV] Some bindings seem broken with VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT +- ci: Missing needs: in radeonsi-stoney-*? +- Triangles appear from the center of the field on PES2021 with Mesa 20.2.x +- [gen9][iris][regression][bisected] flaky piglit tests +- [Intel][OpenGL] Fail to get correct value when sampling from a texture in depth formats. +- Storing pointer to temporary value inside the Iris driver. + + +Changes +------- + +Andrii Simiklit (1): + +- iris: update depth value for stages after fast clear depth + +Bas Nieuwenhuizen (5): + +- radv: Fix a hang on CB change by adding flushes. +- radv: Deal with unused attachments in mip flush +- radv: Skip tiny non-visible VRAM heap. +- radv: Fix budget calculations with large BAR. +- radv: Fix exporting/importing multisample images. + +Boris Brezillon (1): + +- panfrost: Make sure we always add a reader -> write dependency when needed + +Daniel Schürmann (2): + +- aco/ra: use get_reg_specified() for p_extract_vector +- aco: fix DCE of rematerializable phi operands + +Danylo Piliaiev (1): + +- nir/lower_returns: Append missing phis' sources after "break" insertion + +Dave Airlie (1): + +- radeonsi: fix regression on gpus using the radeon winsys. + +Dylan Baker (8): + +- relnotes: Add sha256sums for 20.2.4 +- .pick_status.json: Update to ec3828add38a83b8c09fd5896265abc9d766162e +- .pick_status.json: Update to f93b7d14d66d8ba70d44772d1a1b6696310b7d17 +- .pick_status.json: Update to 3f0da800eb4b8184c24707c52d5a519abe948898 +- .pick_status.json: Update to cf3fc79cd0ab55776bbbabe76237c272ab07133e +- .pick_status.json: Update to 84c8a35aa2ca4d4de66192933735094ed07b4aaa +- .pick_status.json: Update to a7fb3954a1318a6b27e1405a9e799dd8f06eaa34 +- .pick_status.json: Update to ada9be1ec9e14fc045086411fbf2d3cb0efbbe2f + +Eric Anholt (1): + +- softpipe: Fix swizzled texture gather of int textures. + +Erik Faye-Lund (1): + +- gallium: do not reset buffers for unsupported stages + +Jonathan Gray (1): + +- aco: use UINT64_C on 64 bit constant arguments + +Marcin Ślusarz (1): + +- iris: store copy of the border color in the border color hash table + +Marek Olšák (4): + +- radeonsi: determine correctly if switching from normal launch to fast launch +- ac: fix detection of Pro graphics +- ac: fix min/max_good_num_cu_per_sa on gfx10.3 with disabled SEs +- radeonsi: disable SDMA on gfx6-7 and gfx10.3 to decrease CPU overhead + +Mauro Rossi (1): + +- android: spirv: fix '::' typo in gen rules + +Michel Dänzer (2): + +- ci: .lava-test:amd64 template needs arm_build +- ac: Don't negate strstr return values in ac_query_gpu_info + +Mike Blumenkrantz (3): + +- zink: fix direct image mapping offset +- zink: really fix direct image mapping offset (I mean it this time) +- st/pbo: fix pbo uploads without PIPE_CAP_TGSI_VS_LAYER_VIEWPORT + +Nanley Chery (1): + +- iris: Fix resource ptr in resolve_sampler_views + +Rhys Perry (2): + +- aco: don't assume src=lower when splitting self-intersecting copies +- spirv: fix GLSLstd450Modf/GLSLstd450Frexp when the destination is vector + +Robin Ole Heinemann (1): + +- anv: Add DRM_RDWR flag in anv_gem_handle_to_fd + +Samuel Pitoiset (11): + +- radv: mark GFX10.3 as a non-conformant Vulkan implementation +- radv: fix exporting multiviews with NGG +- radv: disable alphaToOne feature +- radv: fix ignoring the vertex attribute stride if set as dynamic +- radv: fix optimizing needed states if some are marked as dynamic +- radv: do VGT_FLUSH when switching NGG -> legacy on Sienna Cichlid +- radv: fix applying the NGG minimum vertex count requirement +- radv: don't count unusable vertices to the NGG LDS size +- aco: fix combining max(-min(a, b), c) if a or b uses the neg modifier +- radv: ignore other blend targets if dual-source blending is enabled +- radv: disable SQTT support for unsupported GPUs + +Simon Ser (1): + +- radv: fix access to uninitialized radeon_bo_metadata + +Tapani Pälli (1): + +- anv: fix calculation of buffer size in case dynamic size is used + +Timur Kristóf (1): + +- aco: Use program->num_waves as maximum in scheduler. + +Vinson Lee (1): + +- meson: Fix Clang microsoft-enum-value detection. + +Witold Baryluk (1): + +- zink: Cap PIPE_SHADER_CAP_MAX_CONST_BUFFERS to 32 + +cheyang (1): + +- android: fix build failure with libbacktrace + +yshi18 (1): + +- iris: fix memleak for query_buffer_uploader diff -Nru mesa-20.2.1/docs/relnotes/20.2.6.rst mesa-20.2.6/docs/relnotes/20.2.6.rst --- mesa-20.2.1/docs/relnotes/20.2.6.rst 1970-01-01 00:00:00.000000000 +0000 +++ mesa-20.2.6/docs/relnotes/20.2.6.rst 2020-12-16 21:42:03.500110000 +0000 @@ -0,0 +1,47 @@ +Mesa 20.2.6 Release Notes / 2020-12-16 +====================================== + +Mesa 20.2.6 is a an emergency bug fix release for the 20.2 series + +Mesa 20.2.6 implements the OpenGL 4.6 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.6. OpenGL +4.6 is **only** available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. + +Mesa 20.2.6 implements the Vulkan 1.2 API, but the version reported by +the apiVersion property of the VkPhysicalDeviceProperties struct +depends on the particular driver being used. + +SHA256 checksum +--------------- + +:: + + TBD. + + +New features +------------ + +- None + + +Bug fixes +--------- + +- None + + +Changes +------- + +Daniel Schürmann (1): + +- aco/spill: only prevent rematerializable vars from being DCE'd if they haven't been renamed + +Dylan Baker (2): + +- docs: add sha256 sums for 20.2.5 +- .pick_status.json: Update to 2d78e28ba702d366becabb8e241b861e6711c76e diff -Nru mesa-20.2.1/meson.build mesa-20.2.6/meson.build --- mesa-20.2.1/meson.build 2020-10-14 17:19:10.276516000 +0000 +++ mesa-20.2.6/meson.build 2020-12-16 21:42:03.525110000 +0000 @@ -992,10 +992,6 @@ cpp_args += a endif endforeach - if cc.has_argument('-Wmicrosoft-enum-value') # Clang - c_args += '-Wno-microsoft-enum-value' - cpp_args += '-Wno-microsoft-enum-value' - endif else _trial = [ '-Werror=implicit-function-declaration', @@ -1064,6 +1060,11 @@ cpp_msvc_compat_args += a endif endforeach + + if cc.has_argument('-Wmicrosoft-enum-value') # Clang + c_args += '-Wno-microsoft-enum-value' + cpp_args += '-Wno-microsoft-enum-value' + endif endif # set linker arguments diff -Nru mesa-20.2.1/.pick_status.json mesa-20.2.6/.pick_status.json --- mesa-20.2.1/.pick_status.json 2020-10-14 17:19:10.226515500 +0000 +++ mesa-20.2.6/.pick_status.json 2020-12-16 21:42:03.478109800 +0000 @@ -1,5 +1,21956 @@ [ { + "sha": "2d78e28ba702d366becabb8e241b861e6711c76e", + "description": "d3d12: Flush and wait in flush_frontbuffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0507da62c47ad6ca0b5c4f9bb2209d773c3e444c", + "description": "gallium/dri: Add D3D12 software driver option", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eb4353838d7b54f0811d171da9afad5416b04059", + "description": "d3d12: Don't require DXIL for WSL", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0b60d6a24d405066c4077154095a76204c103cc1", + "description": "d3d12: Support Linux eventfds for fences", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ee4c80f77df6310807db6a8f29b1389a7c3a2c43", + "description": "d3d12: Only support DXGI and GDI APIs on Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ed286325a0ecbf5fceeb9c8f5d5fc677154fad00", + "description": "d3d12: Only play DLL path tricks on Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dfeb5ecd4ac28b7189bd311cce34a495a7249972", + "description": "d3d12: Use u_dl instead of Windows DLL APIs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "29ccbd9b1e59eebb139008f9d292a0c6f01ca770", + "description": "d3d12: Add forward declaration for LUID", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "738564a75318d6d90c810abb874995ab9a735a34", + "description": "d3d12: Scope down wrl includes to just client.h", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af2e212c598d41d2dd6b1da85537c822198edd82", + "description": "d3d12: Use IID_PPV_ARGS instead of __uuidof", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e1e05ad772e8872af1f0ef420640d02c11784271", + "description": "d3d12: Include dxguids/dxguids.h in files that need __uuidof", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "da9b6a21f4d0b018eb487d80528f8aa326afe362", + "description": "d3d12: Include wsl/winadapter.h when not compiling for Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8fc2676e47d50b561efc4b6c5b932aa637226c4a", + "description": "winsys_handle: Change D3D12 resource handle type to void*", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ee55769d6d6991a88513b0de9fc8e3e06543acd", + "description": "microsoft/compiler: Pick up new dxcapi.h", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6736480035281f9d37a69a3f65114226a5ac9a8", + "description": "d3d12: Add DXCore screen variation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "50fab5da93ad05817f8bc33abba759cc0cb3d0ef", + "description": "d3d12: Refactor screen to abstract DXGI details", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bc5f69faee969cf32fdb7cd31b8b12206d82a77e", + "description": "d3d12: Use DirectX-Headers wrap for d3d12.h", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c37d8ae82b9d6ad30ebffbc8bf043afa0f5b6cc5", + "description": "egl/wayland: add a NULL guard for the authenticate callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a709d99bfd7982bcb15d2c98bea5bc8de91ea0a7", + "description": "st/mesa: set drawpixels swizzle before creating sampler view", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "d11fefa96165836ffeed531a74319a64aa98a696" + }, + { + "sha": "ba74e1be22f646f9639e85b12c7707e96351a075", + "description": "zink: fix 8 bit index handling code", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "f4583b40863e3e57fe0df60daf1d5ab5ea8ebbae" + }, + { + "sha": "008bf6ca61081fbe9025a6bb1facf3f82b08106e", + "description": "zink: ralloc spirv_shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "03ccd3c4687ae401353a72a116b0d4f7de560c88", + "description": "zink: use emit_bitcast helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "134df0069fbc5fb956865c7777ba16663333cb10", + "description": "zink: use shader-read-only-optimal for samplers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1a4c4cd110380c99c51e57c59c868beac01234d2", + "description": "driconf: add workaround for Enter The Gungeon", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "296316b5dec2d3512408859c38f50dd308a75bbc", + "description": "st/mesa: disable line stippling if pattern is all 1's", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "967ae12931e0dc12855de903851222b2f0607878", + "description": "Revert \"st/dri: make sure software color-buffers are linear\"", + "nominated": true, + "nomination_type": 2, + "resolution": 0, + "master_sha": null, + "because_sha": "040a2643c08dd0accee6942bc05836b99417ee49" + }, + { + "sha": "1fd5e7b14eb9130d22f0a6ff219af1680734415a", + "description": "zink: force display-targets to be linear", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "87d7568d69199d1fa16858b45521764d31815aa9", + "description": "swr: Fix building with LLVM12", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "27097ca6b52f3b84a8fd8f7427487440980fae29", + "description": "radeonsi: improve a comment about an MSAA bug workaround", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3b9fb98c4b6bf5d9a5cabf55b47efee7c712769a", + "description": "radeonsi: disable NGG fast launch with indexed triangle strips to fix a hang", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "aaed7a29bef6637c712c459f84ec6ec7911f1300" + }, + { + "sha": "dffc27e5e10350c371bb32b5b49facc8b30b2f16", + "description": "radeonsi: fix small primitive culling with MSAA force-disabled and smoothing", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "7648060dc03775979e3fa8904c4948c084e82b6a" + }, + { + "sha": "836b9e1d8831eb9ec3b1652bc041d70e1e0db71a", + "description": "glsl: Fix typos in comments.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "001411774d203007850a157900c9cd9d0307a417", + "description": "lima: adjust pp and gp max const buffer size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9ed50cba6e8561128a98dcf6ac8a98cef94591aa", + "description": "gallium/swr: Fix Windows build", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "9ac8f8f490d0ee08ac9ca6f1c6963043c2991443" + }, + { + "sha": "5228847c02749079c765cc143064b29737aa93a4", + "description": "CI: Add repeat-wait to Windows Piglit skip", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0e215ff487d23d4bf040e368ec08ef08b7ac4cb3", + "description": "d3d12: Fix incorrect fence timeout calculation", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2ea15cd661c8355e8e35624eba0bf10cbcd57f61" + }, + { + "sha": "f3e33dee07073f0ea25bdf68e335a1ee19b2bcb6", + "description": "ci: Add an ASan build on x86.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6f52386544a38b6d1628ae8c9e17875a31e7e8d4", + "description": "amd: Fix leak in ac_surface_modifier_test.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "26198e875a39625e413dd1f1ee0e4cf4a3d93646", + "description": "driconf: Fix memory leak in the unit test.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d23f4e56fb6ec8b1b13fd48d937850d87ad4f26", + "description": "glsl/standalone: Fix memory leaks", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a8fb7ac1c4f2cee6b2f772efe2a0b503a3480df9", + "description": "mapi: Fix symbols check with ASan enabled.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "39dbf47bfe75231329fa29eb21465d915ba06bc4", + "description": "glsl/uniform_initializer_tests: Fix memory leak", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "49c22c21557fc9289337a861a8b1e4aecb65b095", + "description": "glsl/general_ir_test: Fix leaks.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1d026e22752eb4138d46787d41ebfa2527e449fa", + "description": "disk_cache: Fix memory leaks in the unit test.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6665eb0531e2ada489278be0104b52b2e99b185", + "description": "freedreno/ir3: Free the compiler at the end of the unit tests.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ba8ce62d3cc3761776a666d5e38e5bac6f99bb53", + "description": "glx/tests: Fix leaks in the unit tests.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a3a1a65c09114eea20dc616f189233164ce80e79", + "description": "glx/tests: Remove unused teardown function.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "867e7f7cf4e3e117588612e36bb749b3ec8414af", + "description": "util/vma: Fix leak of the heap in the unit test.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e640a9ca79f29814235fff388c5460d9b12cb529", + "description": "util: Fix memory leak in a hash table unit test.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "547d11de54c6863dc73446181b80f4f368808c3c", + "description": "etnaviv, v3d: Fix valgrind include paths.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ada9be1ec9e14fc045086411fbf2d3cb0efbbe2f", + "description": "radv,aco: Compile with -Wimplicit-fallthrough when available", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6ba83d820c17719d3c1500103df1bcc4efa45f06", + "description": "aco: Annotate switch fallthroughs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "22790ef3d4defa1b34c3be7d131e1cc49b272d80", + "description": "radv: add support for resolving layered depth/stencil images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0e9c231aef8f325eb24bb4346e8c1ab292e024c5", + "description": "glx/xlib: Build fix", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "53f7d539cd93aff35f64a113a7f22fd91f1eb841" + }, + { + "sha": "78c7edf962a8cd1dfd40fd1cddfe0231942e9599", + "description": "zink: Simplify MoltenVK support a bit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0781808935e161b1731da944ffe8e2a4c681f9d6", + "description": "zink: Factor out zink_create_logical_device", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aa93673d9d174a6fdeeff51398bfa4b98a0c441e", + "description": "zink: Factor out zink_get_loader_version()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5f7a8f6fc1d2391937b06ef870c486c8c4c36ba1", + "description": "zink: Factor out winsys awareness from zink_internal_create_screen", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2f1354bdc53628125cec7dbcc4dd1bcde7e8d01", + "description": "zink: Fix indentation in zink_create_instance", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c314893988d4b7408383d5c0357319082c347fc6", + "description": "gallium/util: add bitcast helpers for double and uint", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "615c4610bb6882decc384913b28545c3cb945a1a", + "description": "gallium/util: make bitcast-helpers explicitly sized", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0ba788d73b47aa9c40c692c95e756a39c151bf7d", + "description": "etnaviv: tex_state: fix miplevel selection", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d21cc94201ab1e6cb54ff33a7a835248fe336777", + "description": "gallium/dri2: Don't forget protected content flag", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "bd182777c8f2c4468568901ce92b1796674eb87d" + }, + { + "sha": "df76963a5cd56c677f189ad89d75935a76fca090", + "description": "anv: Add DRM_RDWR flag in anv_gem_handle_to_fd", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "94687ee59f0e027f2957b7420f2ac973054b6d74", + "description": "vulkan/overlay: add new options to display device/swapchain-format", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af9481cb8904ef6207733270d6acc7d011955a6e", + "description": "vulkan/overlay: don't display frame numbers unless required", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2fb87eea6d45ac02c9e669699481711ae70ef50", + "description": "egl/android: implement image cleanup callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a1cf065e0a1aba0deb7c1fee0ed4e720468d28c9", + "description": "i965: plumb loader image cleanup callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d7814d6e0cc8ad10b4568e34d06afad95993e936", + "description": "frontend/dri: plumb loader image cleanup callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "479840459f9562afaecb150537da3173c8735b18", + "description": "dri: add image cleanup callback to loader extensions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "80a028d830e2198a839e346b3b845999680c92e6", + "description": "egl/android: don't pass loaderPriv in get_front_bo", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c0f2a19aeebf9549420709c53e9acfccd895e9c3", + "description": "zink: Fix typos.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9ac8f8f490d0ee08ac9ca6f1c6963043c2991443", + "description": "gallium: Add optional pipe_context to flush_frontbuffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "23488c35152c58a3f15bde6ade658959de94ff9a", + "description": "aco: allow divergent mbcnt_amd masks", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "feee375db997ca662f425fbdad466624c29ddfa6", + "description": "aco: fix mbcnt_amd with wave32", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ffae5a439bff248fdf5ed77dfbdf98059b4069a", + "description": "xmlconfig: Disable WITH_XMLCONFIG on Windows", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "1618bd1bee6c80a3614c980f6649ae5a13bfeff4" + }, + { + "sha": "a7fb3954a1318a6b27e1405a9e799dd8f06eaa34", + "description": "glx: fix spelling issues", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "618e480f3854d8e79187e16152c9cee47146258d", + "description": "glx: lets compare drawing command sizes using MIN3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ef4101d6d73614f4f41708050f963d6038f91e25", + "description": "aco/spill: only prevent rematerializable vars from being DCE'd if they haven't been renamed", + "nominated": false, + "nomination_type": null, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0bccfd86f6e97611a3b9b4f227aa414bc3d04e02", + "description": "aco: fix DCE of rematerializable phi operands", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "d48d72e98af9436babeeb3a94b312f94bc582b36" + }, + { + "sha": "a791c1f3a714cd54a3a97067c911bcfb5b4421f5", + "description": "radv: advertise VK_KHR_fragment_shading_rate on GFX10.3+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "77343576ebd1880d59f66d5b76befd24defe37c0", + "description": "aco: implement a workaround for gl_FragCoord.z with VRS on GFX10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "45524afe95dbbc517677807b7abb941031d4cd4d", + "description": "radv/llvm: implement a workaround for gl_FragCoord.z with VRS on GFX10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7a464f4296a73df89755323e03646ccbf835d66c", + "description": "radv: track if VRS is enabled to apply a workaround on GFX10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c587eaadf6ffc321165b9ff51cf2bd700599d412", + "description": "aco: implement fragment shading rate", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0bac0b7f19bb74fe8e99edec87331cb6c7a9e64e", + "description": "radv/llvm: implement fragment shading rate", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf69d89b5a74b3bf930ecbbbeeaf44767136ec1b", + "description": "radv: implement VK_KHR_fragment_shading_rate", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d8c1931ca93b35c9b236b47e58e95ab0233f2b53", + "description": "radv: add VK_KHR_fragment_shading_rate but leave it disabled", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9770ffb07ccd148387e9ec9b65854fcce492834b", + "description": "amd/registers: add missing VRS registers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c4217ef2fc73576aee636d7c8cc4e4d7e94f33cd", + "description": "aco: don't create dead exec mask phis on merge blocks", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a5f4be4fcd54d7f3c3823b534220ab579c210231", + "description": "etnaviv: remove imm_ prefix from etna_shader_uniform_info members", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fefbafb6ff305437ae7d4e47326a4c68247de3cd", + "description": "etnaviv: rename from immedaite to uniform in some places", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ad19b0714a8d3cb51121b09b6e3915439d5f3452", + "description": "radv: fix access to uninitialized radeon_bo_metadata", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "d5fd8cd46eeedeabf8647c06489a755aea8f0080" + }, + { + "sha": "614c77772ac2f48955537efcfefaf0609d6c03e5", + "description": "st/pbo: fix pbo uploads without PIPE_CAP_TGSI_VS_LAYER_VIEWPORT", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "c99f2fe70ec6a9273786d123fa4ac9924b5911b2" + }, + { + "sha": "dfd0f042e0d8faa374629ea0a67422edff6d48c3", + "description": "zink: really fix direct image mapping offset (I mean it this time)", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "456b57802eaf95d36e7b950bdb5ffd86a1c0dc63" + }, + { + "sha": "ab79e6b8e33689857509c417c37f34c40f72ad45", + "description": "ci: skip failing test on lavapipe", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6a34a686ea953cb64c656518c239998cef78dba9", + "description": "zink: fix property detection", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "a103666b0209907e4c9cbfe8b7e737b0e3cd0aff" + }, + { + "sha": "d7911f9c6b0510b74c96306f748ff19d0f4807ba", + "description": "mesa: Remove cmd_size < 0 check.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "56cb5cf34e8edbf863ff5577efe851852472e9bd", + "description": "intel/genxml: Avoid generating identical 12.5 and 12 branches.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "73c33803359dcb378c040abcc3f517fa4ea65646", + "description": "mesa: Remove extra texObj.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "d0e18550e26209a01465c85382c86a2c9246db0c" + }, + { + "sha": "d3d28f6c2d1795d391442249343d8cd38356665d", + "description": "nir: Redefine start/end_ip of blocks to fix NIR-to-TGSI liveness bugs.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "a206b581578d585d845250f62dfb1e6684ddf2f0" + }, + { + "sha": "6916d1d9247482c851e1a76e1017d04431d44268", + "description": "gallium/ntt: Drop reindexing of SSA defs and regs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7956c788af229f31c2e0f6a9600690ef735a608e", + "description": "gallium/ntt: Don't manually reindex instrs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "41f7fa273d21566d82a6376fb3ad4e44c5e57d33", + "description": "lavapipe: add support for VK_KHR_descriptor_update_template", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6be19765cf238315d1d88c4e474a77e799820b39", + "description": "lavapipe: add support for VK_KHR_push_descriptor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e7cb57d73e771a2f8c86c46a4ba8a33da3874c47", + "description": "lavapipe: refactor descriptor set binding to support push later.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fb7ccd3b23538eff70b5e75d518ffb01bbd06ade", + "description": "gallium/u_threaded: fix pipe_resource leak for staging transfer", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2900f82e1919dcb70d29f34e5ed10a09f7356b3e" + }, + { + "sha": "d9dd8288aa0fc273fd56b87c41f459e29aa385ab", + "description": "lima: add support for half float textures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aca67a555c011f59c37b05544eecbeea9aa42f3e", + "description": "mesa: allow half float textures based on ARB_half_float_pixel", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "456b57802eaf95d36e7b950bdb5ffd86a1c0dc63", + "description": "zink: fix direct image mapping offset", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "8d46e35d16e3936968958bcab86d61967a673305" + }, + { + "sha": "84c8a35aa2ca4d4de66192933735094ed07b4aaa", + "description": "CI: Add Windows source dependency map", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "533f6debb17b02ee9f92f51c19a2da9c0700bca1", + "description": "nir: change return type to void", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e8bf15d107d5d53bc1bf83d2849651db99de8fa1", + "description": "turnip: pCounterBufferOffsets can be NULL on vkCmd*TransformFeedbackEXT()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c0e7a2cd0229bfaae030c266dd847cdcd21a95af", + "description": "mesa: Remove silly \"dummy_false\" extension support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fd49ba59a3888795ad0788259e3472e08c42bc24", + "description": "aco/ra: use get_reg_specified() for p_extract_vector", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "51f4b22feec3720c89458094a3245efc984115ee" + }, + { + "sha": "731f8fc9dd5018e9ee55982207b70130ab72d22b", + "description": "aco: Use program->num_waves as maximum in scheduler.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b54851281354f6feba5921c94b84aa11133e6274", + "description": "nir/opt_access: don't ignore infer_non_readable", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "d7d0b4445ae7a80fff59f91c04b124c5ee40a82b" + }, + { + "sha": "fe669ff4a023a98865b5ab6538613e3ae4c4eae9", + "description": "zink: replace old code with generated zink_instance", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "52138ea9b383ff7ca604e93d78c86eabc4266117", + "description": "zink: hook zink_instance to build", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2b4fcf0a063f32d7000e9f4c6b8e398867d3b21c", + "description": "zink: generate instance creation code with a python script", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "12656de341a778dd4b9fbdd753421b8e4b010510", + "description": "zink: factor out GET_PROC_ADDR and friends to zink_screen.h", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f58d933ea8358ea80b374a5e23063e9bcc6e091", + "description": "zink: allow Extension/Version to be shared across files", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ad7541cf0780a8611fcb3a88b23f0179d24f269", + "description": "mesa: check for extension instead of desktop GL", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "47eb9b865d0d8a252546b9c9cba5dcda8fe10c74", + "description": "mesa: do not allow es2-extension enums for es1", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4eaff8dba72f78f8820f003d5a2ac322b549f851", + "description": ".gitlab-ci: verify that Get-Content worked", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9eea5213910e691a5bc8183bb18d6b68aa8a4525", + "description": "gitlab-ci: copy piglit expected results to artifacts", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "195a001d7366a23c569c38437390edc5ad1c25e9" + }, + { + "sha": "83d1e2efd0e89191da80b62c048a9b7a471a86a3", + "description": "android: fix build failure with libbacktrace", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "848e7b947d0d505d54d27780b052e5532c721678" + }, + { + "sha": "42fd7e3457becc8a6c61132de70d5e70ad11ab70", + "description": "v3d: extend the list of formats supported by the TFU unit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4117f08226fac9a95de88b62d18fc0e9d9b57abe", + "description": "panfrost: Fix several depth/stencil format mappings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7ca72f172678116d29d254b786a9422b864aef3d", + "description": "gallium/util: Fix depth/stencil blit shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e15fd6c36ee2614e006238bcf8121770602226ff", + "description": "panfrost: Reload depth/stencil when they are read", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "94009cbebc73651bab81a97202d7337829569fd3", + "description": "panfrost: Force ->s_writeback_base to ->zs_writeback_base for Z24S8 buffers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "27d622cffe32b45d73e866ac1a03d422a536d51d", + "description": "panfost: Fix depth/stencil writeback on Bifrost v7", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "27af1352803bed4f78744d36638dff28d04decbb", + "description": "panfrost: Allow 2DMS arrays", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "727cc80fe3e19371da6cac9d7c92f99acc853ddc", + "description": "pan/mdg: Fix texture handling for 2DMS arrays", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "77b67a747efcdf5b3bff31cb38c26a2941705475", + "description": "vulkan: Enable VK_KHR_performance_query on android", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "574429f9528734f078c8c3a56232ba66fa818913", + "description": "intel/blorp: Drop support for STC_CCS resolves", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0f369e1472438c71b0c3a9d1bb4ceb1d9b8c8a23", + "description": "iris: Stop using blorp_hiz_stencil_op", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ac0d393eb18deec397efd8d5f40faf38dfeb12e4", + "description": "CI: add lavapipe vulkan testing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cf3fc79cd0ab55776bbbabe76237c272ab07133e", + "description": "st/mesa: Replace mesa_to_tgsi() with prog_to_nir() and nir_to_tgsi().", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e11513b74090c4d8d48c952c3d4c4b4402de1e3c", + "description": "ci: Make sure that osmesa stays warnings-clean in release builds.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ee802372180a2b4460cc7abb53438e45c6b6f1e4", + "description": "mesa: Retire classic OSMesa.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6ffc5611c8a9ba61387f571449d2290aed2266dc", + "description": "docs: Fix the documentation of the OSMesa path.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7497917cfd8972849c611087366a027fecdea80b", + "description": "osmesa/test: Clear the stencil bits in the depth test.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a9a8e05b696ec756c87be7a0f5c7a0aafc310904", + "description": "aco: Skip TCS s_barrier when VS outputs are not stored in the LDS.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "790144e65ae55d44698fc02a670964175029e347", + "description": "util+treewide: container_of() cleanup", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6fe84c5dda2f5935787c428347cf16ec1b1a5a30", + "description": "util: Allow STATIC_ASSERT() everywhere", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5a082911cdaf9a7b04d14c8ec60ad83cd063a524", + "description": "util: Promote __builtin_types_compatible_p compat", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f63c595a73698e96232f6465be01f3449022af9d", + "description": "swr: fix crashes caused by incorrectly reporting SSBO support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5d833f433afe955cc01edc372c17f0059ab99492", + "description": "ci: Only run the sanity job if there's a MR", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "d4151f2efd09254dc8f45dca761f0f83af93d97d" + }, + { + "sha": "f27ee2a3ad9b9fcb9fbc154485c0d0a60f3a7221", + "description": "zink: remove useless import in zink_device_info.py", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3f1d4de25b22fe377c018feedf01d77d9da8e632", + "description": "zink: move blend_operation_advanced conditions to zink_device_info.py", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a103666b0209907e4c9cbfe8b7e737b0e3cd0aff", + "description": "zink: decouple features and enabling conditions in zink_device_info.py", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6e313bc9181a6619698d8478d1c02c4584de2602", + "description": "zink: define and use <%guard> helper in zink_device_info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f0ca77c4787da84487572aed9551f8c05a92d618", + "description": "ci: Move deploy stage to the end of the pipeline", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d4151f2efd09254dc8f45dca761f0f83af93d97d", + "description": "ci: Run sanity job only in pre-merge pipelines", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ef75ede4edf1f603a25e291cee15ae5706a5147c", + "description": "egl/wayland: remove libwayland < 1.18 workaround", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f5711ae7d2b6ff65085bd0db485fb8e58df7f496", + "description": "zink: use _mesa_pointer_set_create for simplicity", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8a30ac49ac72275d4b123807fceb175658203f95", + "description": "zink: fail if set failed to create", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "a03d17ede778610f2c66099d0d5342cf09ef12a2" + }, + { + "sha": "7c85b1d2f0ff951de741c480cfd6e00ba3eb5486", + "description": "gallium/u_threaded: set has_user_indices = false for merged draws", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "351ba767afe5db4603a60be33fb78ec41630f502" + }, + { + "sha": "5eda9673b2347aa33713790b3a04b8363862db2f", + "description": "radeonsi: fix si_get_draw_start_count count value", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0ce68852c1a8d37e837d64f97a2a2ef5d6b669a5" + }, + { + "sha": "5d073b5aa160adfe4c5e0b388bd199741f4893d8", + "description": "ci: .lava-test:amd64 template needs arm_build", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "6c8b921572a542f5de62475a76501a9db73dae4d" + }, + { + "sha": "cd4f6cdcf44c4f789aaf5b5e2b19bb64edc7f259", + "description": "docs: Adapt to FDO_DISTRIBUTION_TAG \u2192 MESA_IMAGE_TAG rename", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0781d9825b31d55aa350dfe158a314eb663e9c5d" + }, + { + "sha": "01175c38c2db582289d6798eb5fdbe5e559331a6", + "description": "ci: Adapt armhf_test job to MESA_TEMPLATES_COMMIT related changes", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0781d9825b31d55aa350dfe158a314eb663e9c5d" + }, + { + "sha": "6701662ae0576f26e62fd23a07ba1bc16b938970", + "description": "ci: Add .use-base-image template", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0781d9825b31d55aa350dfe158a314eb663e9c5d" + }, + { + "sha": "f957d0d915c246ddbc7c52361b6eac47c8a094cb", + "description": "ci: Move BASE_TAG expansion to FDO_BASE_IMAGE assignment", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0781d9825b31d55aa350dfe158a314eb663e9c5d" + }, + { + "sha": "5998a6543a6f3116b6c77ace4f9bcde382e90138", + "description": "anv: fix calculation of buffer size in case dynamic size is used", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "b9a05447a1976101c04a02f5588c51de0b0f6573" + }, + { + "sha": "3f0da800eb4b8184c24707c52d5a519abe948898", + "description": "lavapipe: implement VK_EXT_vertex_attribute_divisor (v2)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5b0b03733a4fdc5b9cd485bd313810b37c81b2a9", + "description": "lavapipe: interpret inputRate as an enum-value", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eec9d67e4478b5330584970d456f02ff4c15c0b3", + "description": "lavapipe: don't copy pNext", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6a27262cc2fbdb3007e2dc20f6534db0312c5810", + "description": "lavapipe: split out pipeline struct duplication to a macro.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2cf0590234e856b5e7f6f6b0d7fc1d3f3a3fec6", + "description": "lavapipe: use ralloc for pipeline copies.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "04c7fce799ac18f4c2ab147c0a9780483befe527", + "description": "iris: Drop res variable in resolve_sampler_views", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1f7427f97216ce63308e810c79b0280a69ef43fc", + "description": "iris: Fix resource ptr in resolve_sampler_views", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ba884b8140fc64d433959cbe9ef46fa12f4fd07", + "description": "softpipe: Fix swizzled texture gather of int textures.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2b09bde1f5450152ce121a5f58943e01223ff783", + "description": "radeonsi: use a C++ template to decrease draw_vbo overhead by 13 %", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6347b0b5c4d886771da17ea5fed5336556b855d5", + "description": "radeonsi: rename si_state_draw.c to .cpp", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "639b1366d020eb2bc08679ac736435aebf6e44f4", + "description": "radeonsi: resolve a tricky C++ failure with goto jumping over initializations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fe839baf6afa30f1504006d0b03b59b3981d0615", + "description": "radeonsi: fix future C++ compile failures and warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "85af48b0ee960177fbff64e848957a52417588b7", + "description": "radeonsi: allow including a few files from C++", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fc212dcaa58c152c2c70f33d50021cbbc7bce0be", + "description": "amd/llvm: fix C++ compile failures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f95fb3ab9c23e852530aa420c398d9c98a152efc", + "description": "gallium/util: allow including a few files in C++", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ea23ca8869a1da9b464258a0505ec3bf1ff11ebf", + "description": "compiler: fix glsl_types.h compile failures when including as C++ in drivers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3d417121937f78e405ebb33dc6bbd0373689bd73", + "description": "ac/llvm: handle no_(un)signed_wrap NIR flags", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3b67c6451f2258520f68527668f41685a3b34147", + "description": "ac: unify shader arguments that are duplicated", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "248268fb7d32b758f264035b120dbfffe3f862d8", + "description": "radeonsi: move si_llvm_compiler_shader and deps into si_shader_llvm.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8cd15226221696a3a1c4bec3c26b5f256321b0ca", + "description": "radeonsi: move si_build_main_function into si_shader_llvm.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "273be1686ef5227a48faf9c3af248d827b02141c", + "description": "radeonsi: move si_create_function into si_shader_llvm.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d9c1a47d48ea32c055e58fee890d943a78361373", + "description": "radeonsi: split ac_shader_args initialization from LLVM code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4a50096ab47681e95c1700fcee83a9418e5300ce", + "description": "ac: add shader return values into ac_shader_args", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2cf44ad30aef5f257401f64958800dc3012ef9bc", + "description": "ac: correct ac_shader_args types, remove sgpr_count", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "50dad7366ee09484b56727559867cc2c64bd8546", + "description": "radeonsi: fix a crash in si_fence_server_sync", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "1f31a216640f294ce310898773d9b42bda5d1d47" + }, + { + "sha": "b08343c404bc0a255eb71abdd5d46abedf935397", + "description": "aco: rename s_subb_u32 operands to borrow", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f4e649a205bbe2884d959c65beb86f34c3ab5db8", + "description": "aco: fix various s_subb_u32 operands to SCC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1a535722d31d967c934f7b4d5485de34ae120200", + "description": "v3d: use job's nr_cbufs field", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e95465ae0eff1fb96b919427d9813f8543371626", + "description": "panfrost: Fix provoking vertex selection for lines", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6b9f943df678a811eab572a0eae389ddf664f830", + "description": "panfrost: Preload SampleID when reloading multisample FBs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dec4d15e6773ff9fd6b6d71d8f393419ae556a94", + "description": "panfrost: Take the number of samples into account in blend shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e27052281aa97f17df5b221337b1c5f3e7464b61", + "description": "pan/mdg: Add support for multi sample iteration writeout", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "29f938a0ece889cd3236fca7e008bf0031de4be2", + "description": "panfrost: Fix fencing", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "64d6f56ad26f084a44a0f5491fc512a65d40df91" + }, + { + "sha": "387221e4f2b9339633f84d8b91774ddd302ed60a", + "description": "panfrost: Make sure we always add a reader -> write dependency when needed", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "c6ebff3ecdde46ef7925326522395b4a50dd65a3" + }, + { + "sha": "c30af744b1f850125d31ce6a15499003262972c6", + "description": "st/mesa: enable compute shader derivatives in SPIR-V", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c5ae01dcf12b8b70d7ca181f82012e4c9d3fc8e5", + "description": "ac,radeonsi: implement GL_NV_compute_shader_derivatives", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d60930c0171501de6fd453101275bad133481e0a", + "description": "winsys/amdgpu: use VRAM for command buffers if all VRAM is visible", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "14f85e1874a1f1c9c8f35906ac1e8dfca7f5a3e6", + "description": "radeonsi: map PIPE_USAGE_STREAM to VRAM if all VRAM is visible", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "913c06f501162ebcdb5ba2381ca12c98cb0ddfe9", + "description": "radeonsi: unify uploaders and upload to VRAM if all VRAM is visible", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6fecdc6dda6da15d616a31900508214c81cd256e", + "description": "radeonsi: only use staging for linear textures when all VRAM is not visible", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3404c3111e0537b234f1cec4c39b7e82edff7b6c", + "description": "radeons: only force staging uploads for VRAM when all VRAM is not visible", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d3d6d38145047a9d531be0e4aba71ab788259b41", + "description": "ac: add radeon_info::all_vram_visible for Smart Access Memory", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8bd25d3835d4399aba2c27e7f82681a9005073e9", + "description": "radeonsi: initialize ctx and gfx_cs first, then allocators", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "443b628e6f21f7fe3a52002c0fd89826d206048a", + "description": "compiler: update gl_access_qualifier comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d7d0b4445ae7a80fff59f91c04b124c5ee40a82b", + "description": "nir/opt_access: infer writeonly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5d524ae62c839f02cf2cf0bca4ef232baa21e67c", + "description": "nir/opt_access: consider global stores", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c9ec7d3f90fc55e17a14016c86cffb9b7dc215ec", + "description": "nir/opt_access: handle variable pointers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4dc565946366b3c80c0661c695f47d1ee1ccb89d", + "description": "nir/opt_access: add basic Vulkan support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d587dc32f37f43aa277516a69ad673db8441ffb8", + "description": "nir/opt_access: rename can_reorder() and set ACCESS_NON_WRITEABLE in it", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "939df4e364cb8474d79dd8107db24a30cb537027", + "description": "nir/opt_access: don't check restrict in can_reorder()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2448d13e0fcfd993001e1eb231b2364db602f406", + "description": "nir/opt_access: check restrict before marking a variable as readonly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e646f6d30e0cbac905356e8229d114abb9648ebf", + "description": "nir/opt_access: ignore barriers and coherent qualifier", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7ddea94819ccf362b8bce036334b65951f8b2829", + "description": "nir/opt_access: don't ignore image arrays in process_variable()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bdd09066fa5f022daa0428325a04a52d66b5e41b", + "description": "iris: update depth value for stages after fast clear depth", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7ad7decc27c46bd6d113b0e7a9d728f3a3911a11", + "description": "v3d: remove old tile blit code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1c76f6e755a5ec0ad8fb37f173cbd47dc8c9cd2b", + "description": "v3d: implement tile-based blit operation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "904f6b92ef93c3bb41df2f8898323e0f927dcc9c", + "description": "v3d: refactor set tile buffer size function", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "487dd96c4fcae4ff23baa7be4135483dddb0ea0a", + "description": "v3d: implement tile buffer blits", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9eb2517a88e3b48cd84fa4b0c50112dc6de2da06", + "description": "v3d: add helper to check if format supports TLB resolve", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8b3bc4e2fba0589bd29cca74ea8d1633e48faace", + "description": "v3d: split binning start from draw", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1ebdeb3c4307e5ab3ff6bdbdadfb274c061ef5dd", + "description": "v3d: store number of color buffers in job", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e3d0abcb2a547ccf10e5e07b5212d8f0e75c6ee8", + "description": "v3d: make set tile buffer size function public", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "01aafb2859ebd5a5e483d0eb86b1c73b72ebdaed", + "description": "v3d: force alpha to 1 when rendering RGBX formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b243ccb060dfdad849b9abc4749556e6fb3a83b1", + "description": "util: function to check for rgbX format", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aff845da6709941beec96d9362972641d2c4a955", + "description": "intel/tools/aubinator_error_decode: fix small memory leaks", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "00cd3f7d130144823c7dfcb2882b8edc7864b413", + "description": "intel/tools/aubinator_error_decode: cleanup path/file handling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "34a35d8f2e0a90a64096fcb2372d2f137b91adb1", + "description": "r600/sfn: Initialize ShaderInputVarying members in constructors.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d4ae1950de1f0cb14094de9287517f9ae55e35a8", + "description": "meson: Fix build with llvm-12.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bc3225272aeb363ba03cdd3319081de1239a0521", + "description": "ci/vc4: Skip VS dynamic loops tests that cause GPU hangs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e096b538248045d76290f058b20a6c97266dd44c", + "description": "radeonsi: fix the blit test for SW_64KB_R_X", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "21b97ef013412e646efd66817fe512b324e308b3", + "description": "radeonsi: rename SI_TEST_DMA to SI_TEST_BLIT", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1f31a216640f294ce310898773d9b42bda5d1d47", + "description": "radeonsi: remove SDMA support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5b81194fee22f6b22f3448d445acd38b647cd1e8", + "description": "radeonsi: rename buffer functions so as not to reference rings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ab1377cf9267d9ff79a3b6b8da0dfe8b1784b2a2", + "description": "radeonsi: move si_screen_clear_buffer into si_compute_blit.c w/o SDMA option", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5fca7cd8b874562858e72d7a21e685c962f864de", + "description": "ci/freedreno: Detect the cheza power management bus error and restart.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "185df8ef07f92ae3e89b3aec912cc5259904ab50", + "description": "android: spirv: fix '::' typo in gen rules", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "1070bba19e26640bc1dbfd04180e97217761404d" + }, + { + "sha": "9d22c438cba0c54dcca829ae1e17bf5c4b36b43b", + "description": "mesa/st: set reserved storage for params+values to 16", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "eda37fb2698f3f0d51ea6dd16b80b7b8ba032a89" + }, + { + "sha": "869a6274a85b9e18341e951d1111a667edbe0a68", + "description": "meson: fix multiline string warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "445b4d13bde1a00546c41fe82745cde4446d9f7d", + "description": "util: Add cnd_monotonic to Makefile.sources", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "33a78948283c9911a06650bd2ba5467678249cfd" + }, + { + "sha": "0bf7f7fcd2f6a8b5560182520c6832f4467da313", + "description": "mesa: remove MAX_3D_TEXTURE_LEVELS, MAX_CUBE_TEXTURE_LEVELS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0ef61a162a7f1d5b7e405e5d858514d24c64e440", + "description": "mesa: remove code for old (mostly unsupported) GL_NV_point_sprite", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8f11b848c3ec4b7169fcab226efe1d130b997af2", + "description": "mesa: fix glPopAttrib for GL_COORD_REPLACE for r200", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "959380dcb0920e3f26aaddfeab68650077675040" + }, + { + "sha": "fa16e66a3f4bf6f7eaef82c1770239be9dd824da", + "description": "turnip: always set LRZ registers to zero for 3d clear/blit", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "1d83f5ae8435c428a20fa947d6a2b22ae453e80c" + }, + { + "sha": "f24358e002de645388852dd85aee4b9b0d3d7652", + "description": "turnip: move up LRZ invalidate in CmdClearAttachments", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2f79e0066405f80c3244d1cc814bc6a51d1d581a" + }, + { + "sha": "aed7c5aa31f3896137a5cdd60f87c30cee3798b8", + "description": "turnip: do not emit draw states in draw_cs outside of renderpass", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3f58d80823e297ffa4a478d9d600db5ac19227d1", + "description": "turnip: correctly disable draw states outside of renderpasses", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af6e74bca86823be63907ec91549ff9f8e88f6fb", + "description": "turnip: always emit LRZ draw state in DIRTY_DRAW_STATE path", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3c07a149986381847a8431db0491286c1fc10897" + }, + { + "sha": "2d886fb43656bf38dfbc3f6c2266b5c421e1837a", + "description": "turnip: do not include compute stage in pipeline_builder", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d7ea266e6f5cdcde5dd880839a057afaa560e97e", + "description": "turnip: no linear_to_srgb for alpha channel for gmem clear value packing", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "ddac5933f8f30df66b389c32a634dab16cff5a4c" + }, + { + "sha": "ab0d6c91fdd96db288da5c0037417b1302399e9e", + "description": "gallium/u_threaded: optimize set_constant_buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4a483ec027e752e1f95ff900bfc5a293ec281134", + "description": "gallium/u_threaded: don't make a local copy of pipe_draw_start_count", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c8fe9df9c462ed0cda0d62c7133f21fc2aa6ea84", + "description": "gallium/u_threaded: don't copy the indexbuf pointer if we overwrite it", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "351ba767afe5db4603a60be33fb78ec41630f502", + "description": "gallium/u_threaded: set has_user_indices = false in the driver thread", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "60653366b8f014edd8cd1bc0b765932f3c2ac408", + "description": "gallium/u_threaded: don't pass index bounds to the driver to decrease overhead", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1b6b31bd1acbd1080e59c94c0ada9b89ef714d75", + "description": "glthread: count batch space in units of uint64_t elements", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4b2445916e6ae6b586bbde53310a411db084e67a", + "description": "glthread: change sizes to unsigned or size_t where needed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6fe524d0f8cbab3d1e5ce32ef694b17dcd31db1d", + "description": "glthread: use uint64_t to declare the batch buffer instead of align(8)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "70b778945b320115c558e17f12e3d630d464dcba", + "description": "glthread: use glthread->used instead of glthread->next_batch->used", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a4004e5fab0aef3ccec102656d7abef303ae5d8", + "description": "zink: use shader keys for samplemask", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0ce792b2e20daea441f52134b7d1d0d2245b32bd", + "description": "zink: change a memcmp==0 to !memcmp", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b47407269de07fcb6a2d898af8bdf6561132fac9", + "description": "zink: initial shader key implementation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f93b7d14d66d8ba70d44772d1a1b6696310b7d17", + "description": "vulkan/device_select: Store Vulkan vendorID and deviceID as uint32_t", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f4bbf29d7671869569eee0bd949ea5a6d8af2c20", + "description": "gallium/aux: Update scons build for u_tracepoints.[ch]", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7dda840f32a92c0c5004e43acdf765ed650f978f", + "description": "gallium/aux: Split u_tracepoints.[ch] generation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4c670b13dd5a5e9425941bc425d1ba4afcfe3c15", + "description": "gallium/aux: Avoid creating queue when traces not enabled", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d1283083eac00c6466a0f066df732699b7074c6f", + "description": "nir: improve divergence analysis for loads with non-uniform resources", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b1619109ca91f5b9b4f29d75479241ec82c31549", + "description": "nir/lower_non_uniform: remove non_uniform flags after lowering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3aaac40b12bf683cb30ea30e35af02d56de9df90", + "description": "iris: fix memleak for query_buffer_uploader", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ebfb9e181737e7ff7be638134410b919145a0f95", + "description": "aco: use UINT64_C on 64 bit constant arguments", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "df645fa369d12be4d5e0fd9e4f6d4455caf2f4c3" + }, + { + "sha": "454c8485929052c1d997d102ed195610d23f9079", + "description": "mesa/st: lower 64 bit ops to scalar before lowering to soft-float", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "59b1578176a786e3c226c0b2b53fed378812288e", + "description": "radv: disable alphaToOne feature", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "34b08a298ddf34f10af21b6a9b3a528b2c51a82f", + "description": "driconf: add allow_incorrect_primitive_id option", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ebb228bec52ae7c456e11a472845720b85751226", + "description": "vbo/dlist: only use merged primitives when it's ok to do so", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1c016a5ba0818207e050a04a72ca2feb883ad6cb", + "description": "vbo/dlist: avoid splitting draw commands in multiple draws", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "83149e1c2d8578fb25cf95502d67917cef78692f", + "description": "vbo/dlist: refactor prim_store/vertex_store allocations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b81ed32ba859202cd4179db456471697abfcb280", + "description": "mesa: optimize _mesa_program_resource_location", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ba67843dbd5965093f7367dbcf42c7f67359c422", + "description": "util/hash_table: add _mesa_hash_data_with_seed function", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "310991415ee1f5bcdd91460f00205190e8e0c2d9", + "description": "vbo/dlist: implement primitive merging", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ac3d4c7635beab92cadada63f8c09fd12a2f7069", + "description": "vbo/dlist: convert LINE_STRIPS to LINES", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dc995adec5ef36dbda43d9dd7f698ff8d6a70f2c", + "description": "vbo/dlist: create an index buffer in compile_vertex_list", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7e296c62a71a172540142a0779722b499add5f3d", + "description": "dlist: do not call _mesa_lookup_list twice", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2b1930a50a0bdf201769c608f3bad71bb3515b6a", + "description": "android: radv: add libcutils shared dependency", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "eeecc21d935c6c8fc783d9f805b5fa153b455df5" + }, + { + "sha": "a0c508993504c7e44999ec8ac654bec4c99ad301", + "description": "mesa/math: Fix address of array always returning true", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3175b63a0dfa290430f9f7eb651387788933a02b" + }, + { + "sha": "77dafaece337b617e51036284fc6d66552614b68", + "description": "android: util: Add libcutils to Android.mk shared libs", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "eeecc21d935c6c8fc783d9f805b5fa153b455df5" + }, + { + "sha": "e2b4247e403957ebd9767b2e8700442306c7e7c6", + "description": "zink: Cap PIPE_SHADER_CAP_MAX_CONST_BUFFERS to 32", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "daaf5f1d1868bebec7931a51753236a850ebbd24" + }, + { + "sha": "6b6cb44ec811bd8992d1f58e9d6cd6709e3cdef2", + "description": "gallium: fix the PIPE_SHADER_CAP_SUPPORTED_IRS value for all drivers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a33dbba26147b3fad94146f384bbf14feaca2c3e", + "description": "st/mesa: remove less useful debug options in hot paths", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "44b7e1497f91e1ad0c4bb6a95a19e2f1b456897a", + "description": "st/mesa: don't generate TGSI for the draw VS because it now supports NIR too", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "df11ceaaaf74e4715cde076eccadf84f2f8fd00d", + "description": "draw: add NIR support to draw_create_vertex_shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8bb4a76addd0f0a48585ba9a4a30d6bbc5acec80", + "description": "zink: fix channel ordering in format-mapping", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "cdfb1d925f388861751ce04a0000141013e1c704" + }, + { + "sha": "6c0ce29b04a6045e6db2279c61bc9b1bcaa58126", + "description": "r600/sfn: Add support for shader_clock", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d95791bb2eeb73df81ac3da2514a56b8533cdcd1", + "description": "r600/sfn: Fix dest-swizzle for GS vertex loads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c5088b497290ba1207085899ec635ef851635267", + "description": "gallium: Fix VAAPI postproc blit", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "49465babdb35d88ed8a283e925d6cd346255d50c" + }, + { + "sha": "c47fe54bc785788147f1f1808d0da720995f5447", + "description": "nir/lower_non_uniform: Better handle non-derefs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0bf8d156a9f0e53e17fe342a9cb528513236ef8f", + "description": "nir/lower_non_uniform: Refactor for better code organization", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "562e07db38499eaa377c0ad88ab789c701ab8927", + "description": "nir/lower_non_uniform: Use nir_read_first_invocation helper.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3abac03d49b5dc4a6642fe0637da5a19ad60dd4f", + "description": "gallium: do not reset buffers for unsupported stages", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "daaf5f1d1868bebec7931a51753236a850ebbd24" + }, + { + "sha": "c4342755cc08c2dc3650bdeafa9c07f51b8b87f1", + "description": "llvmpipe: work around mingw compiler optimization bug", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ec3828add38a83b8c09fd5896265abc9d766162e", + "description": "radv: fix clearing FMASK for layered MSAA images on GFX9+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "35964e9387532a00f63f11d2ee666ce5112e4417", + "description": "ac/surface: initialize the FMASK slice size for GFX9+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c0319e4505b95c72d08864eda517998cf506fbde", + "description": "radv: advertise VK_EXT_sample_locations on GFX10+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3adf8121a0391711004b52f4b5278e1ae34343c4", + "description": "radv: enable using MSAA2x and MSAA4x sample locations on GFX10+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "86644b84b94910ddb78b7a1117b8bcf55bb76265", + "description": "radv: Implement VK_VALVE_mutable_descriptor_type.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "08fb84dc9d5eef78e9914f3c526f9f49fde1e5dd", + "description": "vulkan: Update to 1.2.164.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2c16c209b59baef328fdcf45e0d6d959f498fcfb", + "description": "android: freedreno/ir3: use python3 in gen rules", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5e07590e6a649e52145ebee8f2a4604c7f21cda2", + "description": "android: freedreno: Remove fd_log()", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "03e7c93b828a585c354f2d167c01cacc6054f8c2" + }, + { + "sha": "cbcac6b2fb5dbe37cb517068c4a0ddd8bea39e37", + "description": "android: freedreno: Add GPU tracepoints", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "a02dcb970fff17e73648b58bf4a603bc4f9c0c36" + }, + { + "sha": "8fc7807cc2efe843caf67c82605fdf1f64eb30ec", + "description": "android: gallium/aux: Add GPU tracepoint mechanism", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3471af9c6cfa1dfa46e9607910bd4febc31095d2" + }, + { + "sha": "0553e717e41c10f312618f8a49fae224786d1932", + "description": "android: gallium/aux: update old generated sources rules", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3471af9c6cfa1dfa46e9607910bd4febc31095d2" + }, + { + "sha": "98df055736c6adb916edcb8e6187adef2369787d", + "description": "ci: update the list of expected failures for RADV/FIJI", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "562dd79bfa6c19af871baa0464a5b12f72145d4b", + "description": "radv: fix using FS sample shading if the linker optimized inputs away", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9a993da0ff499f3489f1d57de2c7e3b6da701e7f", + "description": "lavapipe: fix logic-op support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5bcefcc91abe796368adc2f9f23720594c9eef82", + "description": "zink. Fixing vkGetPhysicalDeviceProperties2 and vkGetPhysicalDeviceFeatures2 for Vk 1.1 and VK_KHR_get_physical_device_properties2.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "752f6d806541ac38594f3adb1e45d63709506e2c" + }, + { + "sha": "b24b3026cc0541c59613463a124b16954b2a7cd8", + "description": "radv: use 32-bit predication for skipping FCE on GFX10.3+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3494551d081820b1e5835fb46c8e98564ce855f6", + "description": "radv: set the predication boolean as 32-bit if necessary", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fadcf13c8bad5bc07e7fffaca7197adda66b4bd7", + "description": "radv: fix exporting multiviews with NGG", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5cacb56041d1acbbff162509d0635ba465b80df6", + "description": "radv: mark GFX10.3 as a non-conformant Vulkan implementation", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7c075bae56e45f268e62efcd82589111bce92ae3", + "description": "radeonsi: fix regression on gpus using the radeon winsys.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "f2977a162af46ff0b9d9334bb677b768900ba5d3" + }, + { + "sha": "6df572532dce8fa5c09c4774e0f79e039b118a85", + "description": "radeonsi/gfx10: added support for gfx10 conditional rendering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3bd9db5be3c0e28443098dd0108e01a73c2b83e2", + "description": "r300,r600,radeonsi: inline struct radeon_cmdbuf to remove dereferences", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "40a7f6d0477c06cf7eef0cee253cc3de300236d5", + "description": "panfrost: Only set varyings and varying_buffers when varying_count > 0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e3794264fff9dc3eaa7b84b344c3f22638d0223a", + "description": "panfrost: Fix draw descriptor definition", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "764db6a223f9f3bd785edf9bf9bf5021835be22c", + "description": "panfrost: Fix texture payload decoding", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "87ad5f5b0df3beb3e971b3e5a5187ce1f4ac828b", + "description": "isl: Fix android build", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "f08d8c849e0df3703800a4471039a0297455951f" + }, + { + "sha": "f20153536087079f39f1ab9995ac3d36dd3c467f", + "description": "ci: Run sanity job automatically for forked branches as well", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "459b3fcd064d1410fc89dfdc84b454cd6b649cca", + "description": "ci: Manual test jobs don't need the Git tree", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9a340814cba0d9de7bfc5813e6e0dab310d081f9", + "description": "ci: sanity job doesn't need the Git tree", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "42bc6db7e71c348594277af58975467c54eb2b19", + "description": "ci: Drop x86_build_old image", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "a3543adc2628461818cfa691a7f547af7bc6f0fb" + }, + { + "sha": "0781d9825b31d55aa350dfe158a314eb663e9c5d", + "description": "ci: Append $MESA_TEMPLATES_COMMIT to image tags", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "48f78dfd1ac10d5d309aff437f81d44d1ed06cd2", + "description": "ci: Define global variable MESA_TEMPLATES_COMMIT for ci-templates commit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a82fd890600e60ae4a50a1f0a2ed9ff6ccd2f954", + "description": "ci: Run git gc before creating Git cache tarball", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb46a010bbe9f703ff9d48e450dbd6529906b8e9", + "description": "meson: Fix Clang microsoft-enum-value detection.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "3aee462781abc0bfcce207fb64f9199b43a57542" + }, + { + "sha": "ef9362acb81bd8615cd2f9014ca9880ae3d7e738", + "description": "gallium/osmesa: Return cleanly for OSMesaGetDepthBuffer() with no depth.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ddbad3f4eae3e3b2ed41d8c541c01a0fb21a6e90", + "description": "gallium/osmesa: Fix leak of the ST manager/api on library unload.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bc3142734e321aa64fe6946b79cfdbf9a99ed668", + "description": "gallium/osmesa: Fix data race on setting up the ST API.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "26c6f6cfbb6d489cf6f995718e3e959b9fa5b233", + "description": "gallium/osmesa: Remove the broken buffer-reuse scheme.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c5c1aa7c75c05927017325829cb3f354654d0b73", + "description": "gallium/osmesa: Fix flushing and Y-flipping of the depth buffer.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0223552fa0ac5d2116f8bfdda40b0193176682c9", + "description": "zink: assert all index values in ntv OpAccessChain constructor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b4ae9e07cdf877006bcc7374a64c0cad2b37e82c", + "description": "xmlconfig: Warn if parsing the engine/app versions fails.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1618bd1bee6c80a3614c980f6649ae5a13bfeff4", + "description": "xmlconfig: Add unit tests for recent bugs in the driconf rewrite.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aacf309c8d8e03aa411b5da1298864afd0ed0eca", + "description": "docs: update calendar and link releases notes for 20.2.4", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7cb8700f764e5948a89359bdd5398251b0ca2f59", + "description": "relnotes: Add sha256sums for 20.2.4", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ec2a055b56c9e2402e750bd59ed4a705a86b601e", + "description": "docs: add release notes for 20.2.4", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c553084bf9888f5139b2be411a1efdd95004aee8", + "description": "aco: remove rollback code when making an instruction vop3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "349908587f73ee186693af43812536f15e7c6311", + "description": "aco: move update_renames() out of get_reg()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8794f0348a9507dd3696b1b01a26f2c1759bfdc2", + "description": "aco: remove rollback code for blocked fixed definitions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6f7cb47ad853d85f22a6cc0129b89a6c9edfcdef", + "description": "aco: remove rollback code around parallelcopy creation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9177fe83566083d564996d20cf7f30dc3be59a9d", + "description": "aco: simplify get_reg_impl()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5c9d2ed78d61c63532e9fbeeca30393d65113bc2", + "description": "aco: use clear() helper instead of writing reg file directly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d671cf7f53f3ef8ab4f158776740ecdf790288ea", + "description": "aco: repeat get_reg_create_vector() with increased register demand if fail", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ebd8ab17578d0e823ec96d921e4052fdc1820ff0", + "description": "aco: remove rollback code in get_reg_create_vector()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ad26eae5441bc6f246f095661c4af45f29898dc7", + "description": "aco: don't fill killed operands in update_renames()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "67860b99ce57e6df8e7e150f3c2eb13dc0754574", + "description": "aco: clear operands in update_renames()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f173bf1be963a5df4379afd236fd843dd784733f", + "description": "util: Do not insert uninitialized data if Android property is not set", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "eeecc21d935c6c8fc783d9f805b5fa153b455df5" + }, + { + "sha": "72b68bd2a62d193e26bd6b31123182d7dbae3098", + "description": "ci: add testing for VC4 drivers (Raspberry Pi 3)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f66236617cd52a7c0d5e27f3941c2e7390def2f2", + "description": "clover/core: Fix x86 build", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "f88347cd223c34324816e917e02aeaa31c6a58b5" + }, + { + "sha": "e476c1819634c839c31228b616f53ea2baaa61dd", + "description": ".mailmap: add and update aliases for Danylo Piliaiev", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "76106301248b94b995a7600aa9b99360ce4e91f7", + "description": "aco: coalesce constant copies", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f53d4e5f6087b5a2d09d4513332919592e1c8242", + "description": "aco: use v_lshrrev_b64 for 64-bit VGPR copies on GFX10+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8c02a8e2d207a1d085f7a8fb3a0117b50b769611", + "description": "aco: add get_const/is_constant_representable helpers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b10de4c1d8fc495b042632d8122b957d5ca09af0", + "description": "aco: allow 64-bit literals if they can be sign/zero-extended from 32-bit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "24ee0f55f2d99c9c8d611b36016a6f42aa5188cc", + "description": "aco: remove sign-extension in constantValue64()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8451911156cebf73d446bde5422566d650c6fa69", + "description": "aco: test self-intersecting copies when src=higher", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2c40846ab6f8bf161dc79ed99b51702055c6d538", + "description": "aco: don't assume src=lower when splitting self-intersecting copies", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "09c584caeb2a1e7446ac2016ce7a7d8f0586774b" + }, + { + "sha": "741921e2ffa877500c8fb4045702dfb4dae4f16e", + "description": "panfrost: Update the resource layout before calling util_copy_rect()", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "d4f662a25227a3d469a516ba0acb6dc4d1a71bcb" + }, + { + "sha": "90515f90c8523bbf637a4dfdbce959adc45b4834", + "description": "iris: store copy of the border color in the border color hash table", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "809a81ec3a0b8666ec426a88b86fb133ae5f1bcb" + }, + { + "sha": "055aff2613a2913e499ebb82ad61cc2bb343955f", + "description": "radv: reduce maxTransformFeedbackBufferDataSize to 512", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "da1642569071144a4f2d80b76cb4d725aa94621a", + "description": "util: Add os_get_page_size support for macOS.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ba42de95da4312230b16c25a0ce1e75d8839f9c1", + "description": "radeon/vcn: support hevc SAO enc for VCN2+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c56a1898d73df6c1ad816e76a71c52c782279c88", + "description": "radeon/vcn: use cdw to calculate slice header index", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e37cd34ea6b2b58e584b32d0ebade67d8abd5c33", + "description": "radeon/vcn: add 0x02 to enc emulation prevention", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cf7bf7fadeb76a084235dcfaca8c96f569909aed", + "description": "amd/addrlib: Initialize Lib members in constructors.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "df0157dc697b858a3fa939dc6a938dd58c976bd2", + "description": "meson: Make the glvnd vendor name configurable", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9a3aaffeb809e2bc11b828ee3ccbe66a01e9b8d6", + "description": "radv: Don't invalidate the SCACHE for image barriers.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6d7518600838d4ad7ed3579e2cf979b06feb30a4", + "description": "st/mesa: move cso_context next to the other pointers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a07cb9a1ef70f67675ede78ee1824dde80204ed2", + "description": "st/mesa: put pipe_screen * into st_context and use it", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d23f45577eec88deab8e2083749ad0b7412c49d7", + "description": "cso: inline struct cso_cache to remove dereferences", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8904fcca6dbe3059b73a90d99b9105bfa0661e12", + "description": "gallium: inline struct u_suballocator to remove dereferences", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0b22def5f84c0c2c83daaf00710ae7c50ceee9d1", + "description": "cso: remove unused code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d0bc10affb3845e30d15bbd82c2bfa2469770401", + "description": "vbo: remove gl_context dereferences when we can just subtract the pointer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "03e7c93b828a585c354f2d167c01cacc6054f8c2", + "description": "freedreno: Remove fd_log()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d5bc39c9d0e2a22d9cf968023fb39b296ffdcf5d", + "description": "freedreno: Add trace-parser.py", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a02dcb970fff17e73648b58bf4a603bc4f9c0c36", + "description": "freedreno: Add GPU tracepoints", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7a70f28de2b547f393b6a10079a0123ad335f0ba", + "description": "freedreno: Don't emit log/trace points in gmem for nondraw", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "552dbd4470df82639532b4d6a6ff661081af8dc5", + "description": "freedreno: Remove unused fxn", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c0f2897a7b3976a67509cef6a44c80b17579fa20", + "description": "freedreno: Small log-parser.py cleanup", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3471af9c6cfa1dfa46e9607910bd4febc31095d2", + "description": "gallium/aux: Add GPU tracepoint mechanism", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a1440ec3daaa4f95b5ce007157ae1371c39da5b6", + "description": "util: Add helper to get FILE* options", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1e4cd12c8bf5c7ecf829f203b0162f7268da03cc", + "description": "asm: Try to fix sparc assembly for inverse matrix operations", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3175b63a0dfa290430f9f7eb651387788933a02b" + }, + { + "sha": "70762b826b6d2781de9e06f1dad5d5e741876704", + "description": "asm: Fix x86 assembly for inverse matrix operations", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3175b63a0dfa290430f9f7eb651387788933a02b" + }, + { + "sha": "94c6511757254da6c3b6113c6423bbe27c7ee77c", + "description": "docs: add release schedule for 20.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e1f47001ed068cd7991921b4d28de2a4b8784f7e", + "description": "docs: update calendar and link releases notes for 20.3.0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9681b5b7551414e4feacfd653c8b31a7683a8076", + "description": "docs: Add sha256 sums for 20.3.0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af74c35d7d2e513b9da6981f672d3a48ac1b7cb1", + "description": "docs: add release notes for 20.3.0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "872c4bcd27db7b7ca26abe9fc090ae26d502156f", + "description": "turnip: implement z-scaling and z-mirroring BlitImage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c76edc646b35f7dabef101897fd6b37e5488cee5", + "description": "meson.build: Order the flex/bison by odds of them working", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4c285e70806aa036d73705dd8b6ae46d1176130a", + "description": "meson.build: Disable zlib as per -Dzlib option", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "12fa2d2ac22ef3ddbc8c5efaf55064fe1a19fd29", + "description": "meson.build: Use SSE math for MinGW X86 build as per sse2 option", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81702c0ba6bc588d50c11aeb5975eca57bc18848", + "description": "meson: Don't build svgadrm on windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8955980f17f902d24c50962502a20285dcd11642", + "description": "gallium/targets/libgl-gdi: prefer d3d12 driver", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "843a56324a388ca994f6f2150e8f0235c4830b1b", + "description": "lavapipe: set some basic usage-flags", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "870724d43ba0f2784bb9bb575cb114ca43f00b25", + "description": "nir/opt_sink: use common instruction removal/insertion helpers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "45e43445ce724c4d7f15457ac30fc23fd531bc80", + "description": "gallium/u_threaded: disable forced staging upload at runtime", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2900f82e1919dcb70d29f34e5ed10a09f7356b3e", + "description": "gallium/u_threaded: fix staging and non-staging conflicts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a5e0a2e101bcda0132185a82c3e8c9b4c90ce94c", + "description": "Revert \"Revert \"radeonsi: use staging buffer uploads for most VRAM buffers\"\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "af0435cbfe61632407ce135fbea9cab6fe1e4fb6" + }, + { + "sha": "128b2de63b415e96225a2077846d64bd54edbcff", + "description": "nir: gather if a fragment shader uses sample shading", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c0aa3c83232d21bcd2a4d4d4a34b894e0870f403", + "description": "glx: Claim to support more GL versions in __glX_send_client_info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f39fd3dce72eaef59ab39a23b75030ef9efc2a40", + "description": "glx/indirect: Validate the context version in CreateContextAttribs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6a265420dd585a81fa5c8060122201ac6befcbb9", + "description": "lavapipe: add support for VK_KHR_indirect_draw_count", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f65b1d22f21fedf51d4cdb5480aa6afc723a6da4", + "description": "lavapipe: enable post depth coverage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a6e1971eadad79eb0b592ea795cfafc7c355a56", + "description": "lavapipe: enable VK_EXT_shader_stencil_export", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d3024817514fc0865a61a85c4e95e063450e6433", + "description": "CI: Add llvmpipe- prefix to Piglit jobs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2609fe34522b31106f52adcc0e3de6910d3c47b5", + "description": "CI: Reorder non-hardware stages last", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "220a467b7fa5fb22d15d646f45bc56debe73c0ed", + "description": "CI: Collapse lima & panfrost stages into one", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "567332274579d5c1fe588dcb8759e0781ef2da1c", + "description": "CI: Collapse virgl & d3d12 stages into one", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "343705a9cfa11c4bcdf5fad167c03e5328e1a277", + "description": "CI: Collapse radv & radeonsi stages into one", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d5bf4c99f60e285a0499ab64b9fe5e2b1405301b", + "description": "CI: Collapse llvmpipe & softpipe stages into one", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4d7c84805362adf96a73cc7c630714e920345067", + "description": "CI: Collapse SCons & meson-misc stages into one", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "18f6bd676d1bd6f37c88a55434339080735e8ad7", + "description": "util: unbreak on BSDs after MSVC changes", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "cdf3a6a83b5013dad78a3d95817cd772a146ca40" + }, + { + "sha": "d3c67d7e7ec6b9cf10fbea0d08e92751b7b0fbae", + "description": "freedreno: Break out of \"should we free the entry\" loop once we've freed.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "6de01faac5a20208422fb75d22f2bd88c53f53d8" + }, + { + "sha": "daaf5f1d1868bebec7931a51753236a850ebbd24", + "description": "gallium: Fix leak of currently bound UBOs at CSO context destruction.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d90107a2002891ae0f5e9b1ac9ecd8789c9e1969", + "description": "gallivm: Fix max const buffer count.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "1d35f77228ad540a551a8e09e062b764a6e31f5e" + }, + { + "sha": "634384e4a09d897e0f045e6e99b787804ef4fe40", + "description": "gallium: Fix leak of bound SSBOs at CSO context destruction.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "311470fa64423f6ea2cadabf18c90aee07339aa0", + "description": "meson: Remove old todo comment about pthread stubs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "661b29283425c568d1fa403be703775e02c50d08", + "description": "egl: Skip closing drivers when building with AddressSanitizer.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9cc8fc7bbc4a4d7fe4d3635be207daf2a6672662", + "description": "freedreno: Fix leak of u_transfer_helper.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "d1465b3aeee318a3a76a652325aa13d4a1946f37" + }, + { + "sha": "0626e3a950d5dbe5d86244670b54b67a2af6014c", + "description": "gallium: Fix leak of the merged driconf options.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "8a05d6ffc65d0fd0e0a52fe84a174d4ca63e5521" + }, + { + "sha": "06f2516696f7fe14d80324e6fef44c793ba75864", + "description": "freedreno/afuc: Fix up some sprintf format security warnings.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9cb6e693c911b1119efe8dd727b10bdeab908779", + "description": "egl/dri2: Drop some pointless ifdeffery", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d49e66c3ca8de374960fa4e2b14d665834b706f7", + "description": "egl/surfaceless: glthread support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7a57acad8757a9ffb0d8fac9865b6a02ef50de6d", + "description": "util: Disable [[fallthrough]] for C17", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f2c4a47642cd389fffb18d4877b01856e41d6dc", + "description": "ci: Go back to previous ci-templates commit for debian.yml", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "59a6705ccef1f8a7b1e7f6486e33603c6d6b7606", + "description": "zink: do not require VK_KHR_external_memory", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "f1432fd3e2d915f09a295f1ffb878471ce4e4d73" + }, + { + "sha": "b1224143aa0f8daf0e62d44a8f2fde8080c654cf", + "description": "clover: Use .def files for exports on Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "474baa04eda3b2bba3bf10c121ae98a7137dc128", + "description": "clover/api: Support MSVC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f88347cd223c34324816e917e02aeaa31c6a58b5", + "description": "clover/core: Support MSVC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4cfd57dd489bab37ed77bddcfe4bbf55051f5362", + "description": "clover/llvm: Work around MSVC quirks", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "149a036825ba74f9edd2468430f99ec0e927b845", + "description": "clover: Fix property_element::as for MSVC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "80817b6e344258ac9b955f824ebf9019a0fc1610", + "description": "meson: Adjust Clover's required LLVM modules", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "959e0177990356d450da87eda0e7f99039656960", + "description": "clover: Add version.lib dependency for Clang on Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "72566fd92c27b39abe2057f6f23388ec40793dd9", + "description": "clover: Support LLVM coming from CMake instead of config-tool", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "425cfcafb20ab91669e2a080f4b095111d62969f", + "description": "clover: Add opencl-native build flag", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "60454a4e990e2469846cfa07cf495a4bf5751207", + "description": "gallium: Remove unnecessary forward declaration of swrast_driver_descriptor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb7eee8b406b22d1b5107b9a264249116657b663", + "description": "gallium: Include winsock lib as a dependency for Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a5de2bc2296254340a09a0f83a98ed98d2786e38", + "description": "driconf: Avoid empty macro resulting in empty initializer braces", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cdf3a6a83b5013dad78a3d95817cd772a146ca40", + "description": "util: Add os_get_page_size query", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "852d91edcd48f8e923124a848abd20cabedeede4", + "description": "windows: Always set NOMINMAX to remove min/max macros", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a7abee60b699a6065edbe778cf990dd8d4910bb", + "description": "anv: Avoid a couple of warnings related to vk_error macros", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e60fcf0a8721efc4973910cde4733f4b3b54cb6f", + "description": "nir/opt_sink: return early when trying to sink unused instructions", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "5f6c5e5b86f366c6fe0a0911fdc1926f1e0d1d5f" + }, + { + "sha": "5603bb13e36276ddb1bf1a5c41f00988b368110e", + "description": "v3dv: fix early return from failed drmGetMagic", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b14679ab226bd3e263a778ea8e39828bde38b403" + }, + { + "sha": "9eee4054848c0d2f51818c549354cc156ca22487", + "description": "freedreno: Add missing dependency to build", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "1e8808a4a0f7f28dfc885bbe6c50e7a65ad15bbf" + }, + { + "sha": "770230aab110478330a54aad6237d40db6028e90", + "description": "glx: Fix the generated error when indirect contexts are not supported", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "97858f3c9c15882da5c2cbd4d8a2dbc63d1d0b1a", + "description": "glx: Simplify error handling in glXImportContextEXT", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2c9e7f73ad9514e3b602a365e74edaec64f09ece", + "description": "microsoft/clc: increase test-timeout", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "ff05da7f8dc4aa531704d48f718514e3b1fff45d" + }, + { + "sha": "a7fb25bfe418ed900c246ec8007e595a071881c9", + "description": "egl: fix typo in wl_drm error message", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3f4325e44b2a7eda0012577291ef63417fa044c6", + "description": "etnaviv: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3517b6fd53aca309c720ed384b24792bfe900c0a", + "description": "microsoft/clc: add missing dependency", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "ff05da7f8dc4aa531704d48f718514e3b1fff45d" + }, + { + "sha": "84dd4ac1c75675d16b1af66c79df6078ac33aea7", + "description": "microsoft/clc: use files-function for source-list", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "19a9f225011db783773641640ac5a3c94f9eb227", + "description": "zink: moved vkEnumerateInstanceVersion to create_instance", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "195a001d7366a23c569c38437390edc5ad1c25e9", + "description": "gitlab-ci: do not clone git-repo for test-job", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "d560addc30eb68d87b7d9a52c0857e4e31acd7e4" + }, + { + "sha": "361d143f941c34aead2bbad8541aa1eca54149ad", + "description": "meson: Add build option to specify default shader disk cache max-size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0546ceba16df527d8fd15c23b6a605ec11c71907", + "description": "radv: Use portable ffs and util_bitcount macros", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3ff1eccce6e0cc18a9d2e6d9919e32274158ec2d", + "description": "radv: Replace pthread thread with thrd_t", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "63dc2a53afee76934c131ee0faa935530a8aa75c", + "description": "radv: Replace pthread mutex with mtx_t", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aefaceab09eba4d19c0c5c2120209b23d82043e9", + "description": "radv: Use unsigned with u_bit_scan for MSVC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "60c362c4906afa7968eb1862c25d4fa44cc5355a", + "description": "radv: Create shader cache if ENABLE_SHADER_CACHE", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bed6e462e18869ee44810a30aab1b053a2e3c926", + "description": "radv: Use standard __VA_ARGS__ macro", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "09fb370068921331fee6042a8a81c381383af6e4", + "description": "radv: Fix function parameter types", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6ec0953e22cfea4e4869e45b784a13f44b65c275", + "description": "radv: Fix leak in radv_amdgpu_winsys_destroy()", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "fa97061a8235b64009d7897ecf20cc81258f3403" + }, + { + "sha": "314bcb393762d5bdb572e70ffce9ad1b50c57679", + "description": "radv: Fix callback signatures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "be42e9a6d16dbc903739b31aae5133223b23dac4", + "description": "radv: Update radv_assert for MSVC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "02481e1203847fa242d24e1ff2ae5a17bbad95bd", + "description": "radv: Ignore radv_printflike on Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7f7586704c364e26cfceea28edbb89fb8c26ea03", + "description": "radv: Don't return value in void function", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3fe62252e4ddbad519d0ce50e9dd46173e413857", + "description": "radv: Use os_localtime instead of localtime_r", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fe67fe688a015e137f1ae3c6e36c5f14c2540253", + "description": "radv: Wrap pragmas with __GNUC__ to fix MSVC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c4a516ca7021ff99a569c2ebff28f699fa30faff", + "description": "radv: Replace VLAs with alloca", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "274925836828c1b7d2a34d29a81f7ab537211e9f", + "description": "radv: Update build defines for Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0c70842232ac9a302db285287eb20ffd3be71093", + "description": "radv: Exclude amdgpu driver files for Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b79eac6b0736bf02788b87d420dc9022e1808751", + "description": "gitlab-ci: ignore nv_copy_depth_to_color", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d560addc30eb68d87b7d9a52c0857e4e31acd7e4", + "description": "gitlab-ci: run piglit on windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "76b51fdf0c4205e78a94eefb359b2abec061dd2f", + "description": "gitlab-ci: build piglit in mesa_deps.ps1", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "04e3693e6d64d2d067c869d7ed697b25bb8d3a35", + "description": "gitlab-ci: build zlib statically on windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eaab92cc61f910c09d2c4df8297f2b19edb06309", + "description": "gitlab-ci: store build-artifacts from building mesa", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e88c61eb41929ab0fbb98cc443f5f4f0680cf8a1", + "description": "ci: build the Vulkan device select layer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "06eef592ea77c94fb669543041175f8690e2ff6a", + "description": "vulkan: add missing src_inc to the device select layer", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "f86668f487b32c185388a39e2200c17c298b877a" + }, + { + "sha": "a1976e1cb2d817a4d5229994b83c87d82485af9b", + "description": "intel/fs: Implement nir_jump_halt", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6992d2f625baa460c81a00df2af062851c8c43a7", + "description": "intel/fs: Emit HALT_TARGET in emit_nir_code()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4a7f0aa2e034c7f82cb143367efadb0e3eeca08e", + "description": "intel/fs: Remove unnecessary HALT_TARGET in opt_redundant_halt()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f9d549b2bf94434efb7ef3ff390dd887adb9c9a4", + "description": "intel/fs: Use BRW_OPCODE_HALT for discards", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e76e3590072a60a20115cf69ca0b3aa442269f99", + "description": "intel/fs: Rename PLACEHOLDER_HALT to HALT_TARGET", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f3ad9281901dd0222289f2d5df52814b29001faa", + "description": "Revert \"radeonsi: always return void from si_build_wrapper_function\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "0aba1743611109545bba86bf055a04ecbe210132" + }, + { + "sha": "86675a07f811280974e834c5164de60a315c8114", + "description": "radeonsi: don't check for GS fast launch for NOT_EOP in the indexed case", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c7470c1760cb4f326ccf7483667185c0dcf98175", + "description": "radeonsi: don't set DrawID and StartInstance if they are unused", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c4ddf67ee17aa5bdbf8cdf0e5f9eb6c5ad85e28e", + "description": "radeonsi: don't invalidate emitted NUM_INSTANCES for u_blitter", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "69c927debeb705ec46b50d8e5ddfacb81264b244", + "description": "radeonsi: disable WGP mode on gfx10.3 to prevent hangs", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "9538b9a68ed9aa0f8a231d6bf681f6f0a2a9d341" + }, + { + "sha": "8ae3ad95ef3021d50be7778d3a879cd852980648", + "description": "ac: enable late allocation on VanGogh to increase perf", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "623ea8153018e7874bb133921c5da7d9ce546a08", + "description": "radeonsi: don't update provoking vertex and outprim states in SGPR if unused", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4641dca269cb72fd9f2bd6d078f7055c855d9ad9", + "description": "radeonsi: don't update indexed flag in SGPR if it's unused", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d7ee265a956d17396b716a261434f21f35f48688", + "description": "ac,radeonsi: fix load_first_vertex", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "80a0f8aba384c7e3b64492740898ef8a5b575db0", + "description": "radeonsi: only mask 1 CU for GS/VS waves on gfx10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d4f1dcd155be753ff49cd763be916cf701cda25", + "description": "radeonsi: fix a nasty bug in si_pm4.c", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "da78d50bc87ef5db846a942664094b6299cd1888" + }, + { + "sha": "bbad432e966ea8a8d17af3075ff115415dede9f9", + "description": "radeonsi: eliminate shader code for disabled or masked color outputs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "509142876b8b63d55c97a99525d1251d9f3988ed", + "description": "radeonsi: add AMD_DEBUG=nofastlaunch for debugging", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "de799b2270f5342c2c108488c2c694412b06c945", + "description": "radeonsi: enable NGG and NGG culling on gfx10.3 APUs by default", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "22917baa75c8b1f56b3d79490080a96909594753", + "description": "radeonsi: unduplicate code setting MIN_COMPRESSED_BLOCK_SIZE", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2c61411f253b7211607ebf817d548dd8cacb6ff6", + "description": "winsys/amdgpu: don't use debug_get_option_noop in a hot path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7cabd8e333d1ba5d8af915d9384d5cc37c3fdf5b", + "description": "winsys/radeon: don't use debug_get_option_noop in a hot path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "530c276c4c3d11a364bb82a4e08cc87fc1308989", + "description": "radeonsi: fix max_lds_size warning in release builds", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9d210312650913db29164d8aceb497a53931a3d8", + "description": "radeonsi: fix line stippling with LINES_ADJACENCY without GS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e64d5cc4d6c0b950883be0882f9b7ad3401965c1", + "description": "radeonsi: fix a memory leak in si_create_dcc_retile_cs", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "1f21396431a03dc4e5a542628d7d8370973c967f" + }, + { + "sha": "a287ab2020f0b657d3cebfe5676fcacbe17e585c", + "description": "radeonsi: use util_logbase2 instead of division by index_size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5525551d034a0f1f1913c28a9be695e06b77990d", + "description": "radeonsi: correct the MAD/FMA support table", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f50dea2188c7d777249bcec8c3a70aed50ce7b4", + "description": "radeonsi: always use a staging texture for linear 1D textures in VRAM", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ebcca77d4badfb3b1eb4e8753a3b553126ff109d", + "description": "radeonsi: print more fields in si_dump_shader_key", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "071fd55381598bac5fc6b98a9632c697d8df8939", + "description": "intel/compiler: Add GEN125 to enum gen", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cd3251d6bafaae13d5a2cf52c917ef4517fe0130", + "description": "intel/iris: Build gen 12.5", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3b953f0f7a672389b4a6a591efb94f5e1eca7c25", + "description": "intel/anv: Build gen 12.5", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f08d8c849e0df3703800a4471039a0297455951f", + "description": "intel/isl: Build gen 12.5", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "032be8c4d430501d946f881870188885b918b510", + "description": "intel/genxml: Build gen 12.5", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "569afd37f15cfc51750a71fd695f4496b0358fbb", + "description": "intel/genxml: Copy gen12.xml to gen125.xml", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "99fb15cd31472eb73befd8769134505e1f4b499c", + "description": "intel/dev: Add gen_device_info_is_12hp", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b257795b10f9e1c0b87f34b8f363a75e4b1d51bb", + "description": "intel/dev: Use GEN_GEN if defined for gen_device_info_is_9lp", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "94a9867b0569c6f6d65b27f69b678f7f1785f3ad", + "description": "glx: Remove DRI1", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "63822802ef3de4d7a10b96e86e3de4a4f07b77c7", + "description": "glx: Handle create_context in terms of create_context_attribs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c4ed0e8f3fb63d753e50576d7402c52cb30b0283", + "description": "glx: Check share ctx compatibility in ::create_context_attribs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "731f3c113ee22729e9c98d8e049d824b2c153261", + "description": "glx: Remove unused __GLXDRIscreen::createContext", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "97ae4297541e52af9f47cdc1e4f65ee06ecb78ba", + "description": "glx: Eliminate some stub functions for !GLX_DIRECT_RENDERING", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "893a30eacc68b7add6430e3fa3ef59ef5ca306f1", + "description": "docs: inline contents.rst into index.rst", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e3047762c915032e161bcba612a1436ffae32a21", + "description": "docs: mention egl in api-list", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7f155a6da30a834269435356142c619ec8b90a5a", + "description": "docs: do not explicitly call out es-versions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7b54772d53abfdb619343035815526ff13061e33", + "description": "docs: use external link-references", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f9e27cc1afeb29a52dcdb7d9fd65a7c335474bb", + "description": "docs: move major versions history out of front-page", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "60fa79b00908e3da2ef3889b4946a87483aff498", + "description": "docs: break project history out of front-page", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8fca21d3e70a8f0b09021fddd7db00f8faeae68d", + "description": "zink: revert to old load_ubo implementation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8366d23e7b342ed3d3f8251e6d970cc13d70e99f", + "description": "zink: add alternate ubo loader in ntv", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0471f83b07ad304cc79c60e1e4ddd6ecebb6784c", + "description": "util/slab: do not dereference NULL-pointer", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "e317103753935afa1d1e1486392fc45e3b9e1fa0" + }, + { + "sha": "4c180c262bbb57f22acc9000ea09730debb12325", + "description": "radeon/vcn: hevc main10 profile decoding pitch fix", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c1c1bad582fc780c69d7413c4c2cdb0c7fe5227d", + "description": "d3d12: Fix GCC warnings for missing function prototypes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "30e5abe5f51088f696fae79a69363ac3e78e3e3a", + "description": "microsoft/compiler: Add missing 'return' to switch case", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b9c61379ab4c5065d624fb9403c1df9d5589b313" + }, + { + "sha": "423363803e53ed0f6379f185d2644465f02e2c3c", + "description": "d3d12: Fix use of incorrect clear color variable", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2ea15cd661c8355e8e35624eba0bf10cbcd57f61" + }, + { + "sha": "e593329b0c9fc1177e8d54ea15693a5a4368b802", + "description": "microsoft/compiler: Misc fixes caught by GCC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "456620a6ad244a22941c7d5e728e84350816e400", + "description": "d3d12: Misc fixes caught by GCC warnings / code inspection", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4e481a1f65571be8d09d2cddea18119ae5c4ce35", + "description": "microsoft/compiler: Fix unhandled switch case warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eebb04fca4f6de3848a76e5324b2701818027eee", + "description": "d3d12: Fix unhandled switch case warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "80dcd63f6469965d4b4b42b508fa6228005c4ca2", + "description": "d3d12: Clean up d3d12_compiler.h", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "244a41d6a8183e8d2d8bca5fa3a6ba8e3c80cdad", + "description": "d3d12: Remove Windows-specific macros", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4358af87e77befde471341fb8f8549828fec6a30", + "description": "d3d12: Fix signed-unsigned comparison warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6060291c0e386118b8ae115189d0b0da3bdd4a54", + "description": "d3d12: Fix brace-initialization issues", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c3bf781cb04cb45b30ce56e14a81266c6790b120", + "description": "microsoft/compiler: Remove dead code/variables", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af6983885bd015ff6961a441e1c87153058bc49a", + "description": "d3d12: Delete unused local variables", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "89f6b72f19dbc503386643c6283047bdb1013bef", + "description": "gallium/tests: fix unused-but-set-variable warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "34e5b46fff2124d13b0d39269cdb88176a737340", + "description": "softpipe: fix maybe-uninitialized warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5a702fa4e5c4aff130088365763513135cd0188b", + "description": "wgl: fix maybe-uninitialized warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "def15ad9ab57602b44268be88dd2cc10f02592dd", + "description": "glsl: fix redefinition warning on win32", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "07a68d27b36bd0407a53a897fa55aa48410b13e7", + "description": "glapi: fix unused-function warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "89b01cc02522d50843710242934daaab6687af97", + "description": "util: fix mingw format-extra-args warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5f6c5e5b86f366c6fe0a0911fdc1926f1e0d1d5f", + "description": "nir: don't sink instructions into loops", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b02e15d1a38b2454d7dba3e05aeb68dfbbddf007", + "description": "d3d12: do not inspect NULL samplers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "94f8cb29ee7c615e74b4a85e14ac9e22c5dd7f2a", + "description": "aco: Fix NGG GS assert failure from the WG scan.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "62b5012ec35358a7c109746bfe1da33ca1b04f24" + }, + { + "sha": "c2974a1d45c7e922bb36571cf6873f368dc639a6", + "description": "intel/tools/aubinator_error_decode: allow 0 arguments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7a7010d4f5935ca31dc3aee285bb95ecfd6bbef4", + "description": "intel/tools/aubinator_error_decode: allow \"-\" as an input file", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8ffa45a775a6457dae85bd48bd3d424500fc55a9", + "description": "intel/tools/aubinator_error_decode: exit with an error on unknown option", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f9e4c0ada15b28118d65d119c508a7dc42da038f", + "description": "d3d12: lower bitfield_extract to shifts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3a858ecd406f27724c17d3fd409a558cb1cc4917", + "description": "Revert \"radv/llvm,aco: always split typed vertex buffer loads on GFX6 and GFX10+\"", + "nominated": false, + "nomination_type": 2, + "resolution": 2, + "master_sha": null, + "because_sha": "6fb4babfe98e222e648eb2ab210136b7540bf039" + }, + { + "sha": "4c58c68ea6099be9313a0be08b21cbcad893951b", + "description": "ci: disable check-commits", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "37226269d375d3ec4d48a20c019ea9b4667f79a3", + "description": "etnaviv/drm: add some locking asserts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "034dd948dfd88d0686796e33563b99735127564b", + "description": "etnaviv/drm: convert to simple_mtx", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aad0c7c6b8e73141f5dd5aed0c091efd5dba5b99", + "description": "etnaviv/drm: fix evil-twin etna_drm_table_lock", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4f7d4871a6f85ef51c3660bf673426a820c4e1c6", + "description": "v3dv: don't log out of pool memory errors for internal driver pools", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9adbaeff8526bd8e895ff083d2494a4f4eac0783", + "description": "v3dv: move error string definition to debug path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4679a3855f435679c2d95a5db18ae3faee1e6648", + "description": "mesa: replace ParameterValueOffset[i] with Parameters[i].ValueOffset", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1755a5a38480264a4e25070209ce352b744183b5", + "description": "compiler: decrease STATE_LENGTH from 5 to 4", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fdd3a448aeba8dc79f6d758d0219962daa55772b", + "description": "st/mesa: fix uninitialized/random clip plane state vars in lower_ucp", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "584f27326c1560cc23538cb76fb734ab492ccc3c" + }, + { + "sha": "f097c8773effcc2a1b940371c636ffa8c1cfd4ea", + "description": "mesa: allocate the attribute stack on demand", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c0456a65658b9e5dea0e6f1fb48ae4bf0add23ea", + "description": "mesa: remove gl_texture_object references from glPush/PopAttrib stack", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "959380dcb0920e3f26aaddfeab68650077675040", + "description": "mesa: more optimizations in glPopAttrib (colormask, drawbuffers, coord replace)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6f6d3c0e7aad35ddf2100aedd2e39b71e42cd5c", + "description": "mesa: optimize out no-op calls in glPopAttrib", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "27b981352a8e89fc1c4ae2f0cbb64f1ff02bf6a7", + "description": "mesa: skip _mesa_set_enable in glPopAttrib if there are no changes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b4bf8725856d571121e0a0fe644ec7b1877adacd", + "description": "mesa: reduce the size of gl_texture_attrib_node::Texture by about 90%", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d0e18550e26209a01465c85382c86a2c9246db0c", + "description": "mesa: optimize saving/restoring bound textures for glPush/PopAttrib", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9a8b54285d24e84facc6f7aefe486a568b6e80c4", + "description": "mesa: reorganize gl_texture and sampler structures for glPush/PopAttrib", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7fa9d9d06c44e9cf3d39b2ce5126bf1b0abff586", + "description": "mesa: add a fast path for restoring light attributes in glPopAttrib", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "734af6135334a91bf7615b2990bb85af3db62e67", + "description": "mesa: add a fast path for restoring fixed-func tex state in glPopAttrib", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3be42f9ca19d593d374d309f47ebd80abb001a24", + "description": "mesa: rewrite glPushAttrib/glPopAttrib to get rid of malloc", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e08b3b74e7f1d8fa22e1797ef4654d9ed6ff0e3a", + "description": "mesa: treat glPopMatrix as a no-op state change if it doesn't change the matrix", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d7347105fce6cdbd2abbc827c7661a2c063c40e", + "description": "mesa: memset matrices at initialization to enable memcpy on it", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "019f13160131212a1c03f0015f9e8ff9c7282182", + "description": "mesa: canonicalize matrix in glPushMatrix to make glPopMatrix possibly a no-op", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f5d17070bec94642be848aa40a471cdae8f05ac3", + "description": "mesa: consider glPushMatrix a no-op change from the driver perspective", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b1982fd3d2bd12dbc1c85b61629d0c2a53606551", + "description": "mesa: skip glMultMatrix if the matrix is identity", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7b50332aef469273a4cc73f72cc5166412a4cb96", + "description": "mesa: fix crashes in the no_error case of invalid glUniform calls", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f94c190581ef71145d74e6342b5a9c5473df202a", + "description": "st/mesa: replace st_context::state::constants with a mask", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "96fc1ab6476ce8cbd5dc739f0d723f286d0a628a", + "description": "st/mesa: add a faster path for uploading state parameters into constant buffers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b1f091bd924a4fba9e201ce8e0f98a91e9a8f1d", + "description": "gallium: add PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b8423c2eee0e4d93502640343107f0e91fcbca8f", + "description": "mesa: add helpers for drivers to load state parameters into buffers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7fe4a830fb02e265885402090085affab9eaf665", + "description": "mesa: merge light state parameters for faster uploads (disabled)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d6d66d516347d1749cfecb30792e110a0853ac8", + "description": "mesa: merge matrix state parameters for faster uploads (disabled)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "63f7d7dd0a843254ffa51a41e2b90d5ab4dc45d7", + "description": "mesa: take advantage of sorted parameters in _mesa_load_state_parameters", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9bff76b630b1655a934db6e5f3e6a59a5b6cde7", + "description": "mesa: put constants before state vars for ARB programs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "06a141469b38e6fd2fb2f870629e2e10d8a7ba00", + "description": "mesa: put constants before state vars for ffvp", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8cb0229f26a123015147c92a165a8d083e1ab1dd", + "description": "mesa: restructure gl_light vars to match the layout of gl_LightSource uniforms", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "51ce2f6c8d0f28a972cb5f5608248cb77471abdc", + "description": "mesa: optimize setting gl_Light state parameters", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cc4afb21010993a2d3fcac8a17c9bc3e49cf32bb", + "description": "mesa: demystify material_attrib()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fc31d9b733d0f8f6ab1983ccba584d28002fc2ba", + "description": "mesa: allow multi-slot program parameters", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "71504008c73ceea54824e6bbc0ddabf74e4f0c1b", + "description": "mesa: fix printing state parameters", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2770a4bc63231adb822350efd55a46435e140b3a", + "description": "mesa: remove redundant _math_matrix_analyse calls in fetch_state", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ca6c99604f78249e273bd01b95e3ee5462f585e", + "description": "mesa: rework matrix statevar enums to remove excessive branching in fetch_state", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3175b63a0dfa290430f9f7eb651387788933a02b", + "description": "mesa: don't allocate matrices with malloc", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9c84ca574d43b47932f32fc7cb67a21cec7ffe8c", + "description": "util: add a common ALIGN16 macro for m_matrix and u_threaded_context", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "14a2117fc81178b81ad93ee5ff41b9ab907f689c", + "description": "mesa: replace _mesa_problem with unreachable in fetch_state", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4035a19320661ef28a6c1c335a429d3e5211ce95", + "description": "mesa: don't read from destination memory when computing state parameter values", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bde7fa0cb32aad2e16434077748f74b5d593195a", + "description": "mesa: skip redundant uniform updates for glUniformHandle", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b32e20e6301f2c1bfb25a939bddfeaa41d43c8c9", + "description": "mesa: skip redundant uniform updates for glUniformMatrix", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "736f1f70ab8a7995041a707d28d0e1a5f2940f4c", + "description": "mesa: skip redundant uniform updates for glUniform", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d0c66c167d3e0cac04861edbd939525002a8dbae", + "description": "mesa: move sampler condition for flushing into mesa_flush_vertices_for_uniforms", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a2117bc9e01bc821a6f766c516665d2aa5b47d8", + "description": "mesa: call FLUSH_VERTICES before changing sampler uniforms", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "9545139ce5f10180547568a3da94af9e291253fd" + }, + { + "sha": "9281dfca3fff6cc725ebf02e6cd517cb473e3f1d", + "description": "mesa: don't print GL errors in release builds if MESA_DEBUG=silent", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eda37fb2698f3f0d51ea6dd16b80b7b8ba032a89", + "description": "mesa: properly disallow param list reallocation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "158351007e078a1e2068dee1e3913f9ff973cf49", + "description": "mesa: track ParameterValues size separately", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "25e242155b9810e356e5aca7a8a0370840da56b9", + "description": "mesa: don't duplicate allocation code in _mesa_new_parameter_list_sized", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5d14562da86f1f3ee1a747183d8bac183ce75fd9", + "description": "radeonsi/gfx10: fix overflow and primitive queries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fd4016f978292e1576e28086eeab44eb1225ab25", + "description": "v3dv: ignore filter in TFU blit path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eb75a67bd61e244d7c94b4741a3c53b8ee2a3fad", + "description": "v3dv: add a helper to choose a compatible TFU format", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "329afedd8b44e85e9932da209951d235d60af7a3", + "description": "v3dv: handle Z mirroring in the TFU blit path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "29f62061d95eabaa925918a493045f91b887a55a", + "description": "v3dv: expand the formats that can be handled in the TFU blit path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cc62a0dfe4c7eeac18a4a8fc37b07803088c08ef", + "description": "aco: Avoid extra bitfield padding", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "44f05d6b742b27fd063c7789ea4ee7f9d68f8d4b", + "description": "aco: Stub sections that don't have _WIN32 support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2c3fd34103c75df28568c6ee888021b17fc011be", + "description": "aco: Fix warnings for bools in bitwise logic", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8bc2a419fcbc89e7c7460f0eadebac104332caa7", + "description": "aco: Initialize union within Operand for MSVC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2c981f8d0536793e20225521581f47c9e2889df", + "description": "aco: Use u_memstream instead of POSIX memstream", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8259cfaa659ed780dc79e033c678bc2a315e5ad4", + "description": "aco: Replace indexed array initialization", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "76ad75dbb86e6ea6442c5c301855f0f3eeac485c", + "description": "aco: Const correct aco_compiler_statistics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9779a9a51e5d21044a2edbe15b876c5880d5804b", + "description": "aco: Declare num_reduce_ops for array size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "93094b8c5e83bcecd37519cbe9f67cc6259faf48", + "description": "aco: Remove nonstandard parentheses", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d1f742e497a4720e8c909995759fcc93c1308a0b", + "description": "aco: Add missing C++ includes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e352ebf88e82f99b554ce37bb7e6b2004949f188", + "description": "aco: Fix warnings about unsafe integer/bool mix", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fcd53bebe680f4171ee4b08669543da0c1243e76", + "description": "aco: Define NOMINMAX in Meson build file", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2d12991e016bbb948c01a86e7628440987c244c2", + "description": "aco: use FALLTHROUGH macro", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ce56902f857e392b98c34bf1e1323586f4649847", + "description": "radv: use FALLTHROUGH macro", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6fb4babfe98e222e648eb2ab210136b7540bf039", + "description": "radv/llvm,aco: always split typed vertex buffer loads on GFX6 and GFX10+", + "nominated": false, + "nomination_type": 0, + "resolution": 2, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf7c4108fb62e52dd9018b6670ed933fddf7758f", + "description": "zink: allow the backend to optimise shaders.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e2e383e36a08e3d536b22c4825e0486499739e97", + "description": "glx: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ff89a0afc6e8b9ceefc0d39c6cab2432b21e0dac", + "description": "tgsi: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "842088667f44ec77ba72803542a5aee0056cc68f", + "description": "egl: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "148a24f30514638d149a24a8fe084dc21730edb2", + "description": "amd/ac: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4d89b988bae108d37673e87ef5a691fd36884fad", + "description": "nir/ntt: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fecbc1e541fccb59bc7004508c2b2a8353524805", + "description": "gallivm: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "04eb94f93ba136e1266ae89e57bc6f6ae29365b3", + "description": "llvmpipe: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d1532cd83f9edc8382c41bd6b9f9a1523d522f8b", + "description": "radeon: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c7986febf276c2439fc476af09c301f37e7dbc67", + "description": "gallium: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cd432d99811d5198d7a164c25d4f2a4b06d9aaf6", + "description": "softpipe: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c27563d64a947ab00c1a9cd0f9cc1df3e0836511", + "description": "gallium/util: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7d9147438d8b6c7ec087507298438a59d324644a", + "description": "vbo: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7b222cd52d056df6c569519ea5de2f68d6ed8472", + "description": "gallium/winsys: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ead225bb6fc3d533515b8d5ae2e304086cb3257f", + "description": "radeonsi: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4442f8eda3cf24f38ce2cb204c614a562079aee5", + "description": "compiler/spirv: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2aa777b6853ffbe23c5a5a7ae0503a17c158e9b", + "description": "src/mesa: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b3e6014a99c935dabe957299457be0e2de47e39", + "description": "xxhash: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2d6fa03f74a7aa5224447f6bae67592dfcc1f52e", + "description": "gallium: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "805b6b426e9c1536a0d705ae3b0fdb7fd3dec04a", + "description": "nir: update fallthrough comments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5e7c00aacb98cfd45082489bee05a456637c5349", + "description": "util: add a FALLTROUGH macro", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a5b899c7da4dc628debf11ff10e27000f62aed34", + "description": "spirv: add support for KHR_fragment_shading_rate", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "244514addd990dc211c8352e03dc13bbd21e6bab", + "description": "compiler/nir: introduce a new helper to get varying name", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1c9488e0d1bb500a19440a98c2debfbf42b4ff1f", + "description": "nir: wire shading rate variables", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f86668f487b32c185388a39e2200c17c298b877a", + "description": "vulkan/util: Consolidate typed_memcpy", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "116b6d135dad7f45d018a2d715a614b35143f385", + "description": "util: Add os_localtime", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "531843cf2e939b764822ef56ba8e034ad417a812", + "description": "nir/algebraic: Avoid creating new fp64 ops when using softfp64", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "688dda5e1dd867607fbf837f6a5ebfdeeb8dd66e", + "description": "mesa: Use os_get_option() for MESA_*_OVERRIDE", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eeecc21d935c6c8fc783d9f805b5fa153b455df5", + "description": "util: Add property_get() fallback for android", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "73c6899285da9b93d5d140b44bca8a1a79afacad", + "description": "intel/fs: DISCARD_JUMP does not have side-effects", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "75209d5bd1f6e93cd52568d87d3ee84f516eec56" + }, + { + "sha": "b70847a3894cff36c6a4bd1cc96d174e5920397e", + "description": "nir: Use the right argument order for load_scratch_base_ptr", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "c9bcad25737418b2f2b754f5c893ad7feb1c59f4" + }, + { + "sha": "b14679ab226bd3e263a778ea8e39828bde38b403", + "description": "v3dv: check return value of drmGetMagic", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "124ea8debf95f3d6343975cb432e2e3d19db3eec", + "description": "v3dv: expand format coverage in TFU path for buffer to image copies", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b3f212c52e278ced13a795fab8c05f2f65ce7c0a", + "description": "v3dv: fix base layer for 3D blits in the TFU path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ece8dbe68f01972fd744a003fbe67fad62963c08", + "description": "v3dv: add a TFU path for image copies", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f9e53c6e5142302cef60bab3202538f6142ad759", + "description": "v3dv: add a format parameter to emit_tfu_job", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d60a3ccf52be669f6f2973048ee53994125a6ace", + "description": "v3dv: support compressed formats with TFU unit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "14e7361c4a7258b7d38e36777418c58a71d19bb2", + "description": "v3dv: remove obsolete disabled code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "04ea3d65018ede109ba9a94f61bea3076eabf969", + "description": "radv: disable WGP_MODE for NGG on GFX10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf36cfced183bac5bb5e2f8e564674071493bdad", + "description": "radv: only mask 1 CU for GS/VS waves on GFX10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7729ea3d677e5f2c39013d112a9a395113229acf", + "description": "radv: only disable CU2 & CU3 when NGG is enabled", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "750591f4cbc1ffb2b99ad8c82126b3208fcb30c1", + "description": "radv: enable NGG on GFX10.3 APUs by default", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9ee0cd69a04a99d7fadd831c1c255b1523a2731", + "description": "ci: Make test-docs job depend on sanity job", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "94f4497a7909ef390b84b070de68923bb4487d43", + "description": "ci: Squash \"check mr/commits\" jobs into a single sanity job", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "988dfc944dceefffeb80fd577ef28c75c25e6169", + "description": "ci: Move sanity stage to the beginning of the pipeline", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8749fc4261467ff8c7eb25880516649ac794a20d", + "description": "ci: Use ci-fairy docker image instead of local git_archive one", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cfe45f3bd50cf3bc9af0ac756648423e9e3b03e0", + "description": "ci: Update to current ci-templates", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cc80415a41b1a096f97ec092e615666fbe94c6f5", + "description": "ci: Drop ci-templates-sha anchor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4e074a6d5272bd99536cd12a1ff6afdc6ece2890", + "description": "vulkan: Portable wsi_common_get_current_time()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "142d7b0f360d003d6c3914b198fb37e3e6b4a6cc", + "description": "vulkan: Replace pthread mutex with mtx_t", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "440952f152aaaadc3083cece34e5782c6c278781", + "description": "vulkan: Remove GCC pragmas by fixing warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d1768b23b5086abd8d6f5bd297300ce4829d3beb", + "description": "radv: save and dump vertex descriptors during GPU hang detection", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f3e7a6502f01a6f38121c1512dcb55a66e63361", + "description": "radv: fix using bitfields for debug/perftest options", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3a90641b0d642589c31963200e256953a96ac325", + "description": "mesa/st_vdpau: set surface winsys handle modifier", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "c786150dfa514c35b0c384cc5184282d58c1f8bb" + }, + { + "sha": "b3504f41733a55c7680fbd2f6240df2c0e9f8c3d", + "description": "amd/common: Check with_tests before adding test", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "40ede410358c85cba6b8990d56846014b0e712b6", + "description": "r600/sfn: remove leftover debug message", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "093135067345488e6506045b8b92cf18b44e5893", + "description": "r600/sfn: remove unused file", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "166657a736c54497da6c9278549c7ccd6c3b917a", + "description": "r600/sfn: Fix a few warnings in release builds", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "261f42f0c8790edfe40a3da65715fb29164335d8", + "description": "r600/sfn: fix definition of priority queue", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b8fdcffc4c755241d645c62385729ee14dd8620b" + }, + { + "sha": "feaecbeeabbdc01498bc78cf65c3352a3d56480e", + "description": "r600/sfn: correct error signalling in switch default case", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "de353c1fbe2fe21809f824049fa5e415abae7628", + "description": "amd: Fix declaration mismatch", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a212a8a0f0f52ca7fa52e96b44825791221ef55d", + "description": "amd: Fix signature mismatch", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7c521e9968e4df3e702d6bd20f384afa3bc47111", + "description": "amd: Work around MSVC limit for string literals", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "740490787093ba3ce223485059052045b2ad958d", + "description": "amd: Replace vasprintf with vfprintf", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3ad8ac38c760732db87ff9809a049b0324804ff3", + "description": "amd: Stub sections that don't have _WIN32 support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ee72cd0757d77ade1e942c778c363249c72df82e", + "description": "amd: Remove bitfield sizes from enum values", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "31b4fdc00870e8c7c433ca30a05c32a0fe5528ce", + "description": "amd: Cast to int for %d snprintf argument", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb1adece5e3e9ac417ea5ba50bda8ca6b1a35dc0", + "description": "amd: Simplify ac_addrlib_create", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "62a4a7787568b49b67ff17d9feb00a2c698e29a0", + "description": "mesa/st: choose S/D format depending on gl_format passed for readpixels", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4e623726114922da0a5f788d4bc70e7962a6176d", + "description": "v3dv/pipeline: avoid unused warning on release build", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ba86ca9c367b6a7a6511a3d99dafb374db4921a", + "description": "docs: grammar fixes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aed8d30b507568b7fc0f32afca012f8def5aca16", + "description": "radv: Deal with unused attachments in mip flush", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "4cce4d22a72bf84459ee95223cc7d1c6542617fb" + }, + { + "sha": "160a0f255194953b9c6cd3d305a4432645c0b60b", + "description": "freedreno/a6xx: add support for ARB_shader_stencil_export", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b589df98622c811b397e6579358b2a51cff8a5ac", + "description": "radv: disable SQTT support for unsupported GPUs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5cf41814cd52f7de23e527518d1d90b8d346c78a", + "description": "aco: use binding chasing helpers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e732bea536be128afec8e364dc2b30f3d15498d5", + "description": "ac/nir: use binding chasing helpers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "856a6523404d3b6358d8a96aca8af00a0157b489", + "description": "nir/opt_load_store_vectorize: use resource binding chasing helpers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "31c9c727d1191e6ef4f3dd5c58de5264ef56d8dd", + "description": "nir: add helpers for chasing resource bindings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5b946d6eac26d62628e37ea9d241908eef938b39", + "description": "CI: Re-enable MSVC build", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6bb5e29b1b820c0ac45abdc0f6a10ab450a3053", + "description": "CI: Build d3d12 Gallium driver and CLC framework on MSVC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "484fe1e9f260706f28ac5c06bf3753970c08c158", + "description": "CI: Update Windows build for current Meson options", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "77a474cb88e57ad9ad5d01b5bd2ea9e664097a56", + "description": "CI: Remove ludicrous Windows container build timeout", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "187c146a2e04d0db575bbeb787e34ba124de655d", + "description": "CI: Windows: Use 32 vCPUs for Mesa build", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ddc900f7384c247185ca572e572aac8bd4748d06", + "description": "CI: Add Windows libclc and SPIRV-LLVM-Translator builds", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ad90e9fee6ea2c7bf05104a589d60e9368f5427e", + "description": "microsoft/clc: Disable broken f32 -> i64/u64 test", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3a134abab8e3c2e7f5e3a8c3b94a657c9b7cca6f", + "description": "microsoft/clc: Allow building with Clang git", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "67ba666a454a8856ca1ca7813da5c5b7308cb136", + "description": "d3d12: remove hand-written intrinsic builders", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "c9bcad25737418b2f2b754f5c893ad7feb1c59f4" + }, + { + "sha": "2846d7811e5c84dc37b0fe564547e961d62ff638", + "description": "nir: fix nir_builder.h on MSVC C++ and GCC7.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "c9bcad25737418b2f2b754f5c893ad7feb1c59f4" + }, + { + "sha": "7a77008283757d7c548572ef7a66d8d8c6052134", + "description": "nir: fix intrinsic builders on MSVC C++", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "c9bcad25737418b2f2b754f5c893ad7feb1c59f4" + }, + { + "sha": "517b6986613e1d80d547ce82f56244c11ad2a1cb", + "description": "r600/sfn: lower all IO in one pass", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "45f17702db655e721b3d85a6085a488ec98a774b", + "description": "r600/sfn: simplify IO lowering and fix TESS IO lowering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "efcb7b70504824ce8831f191ca711045f73dcf76", + "description": "r600/sfn: lower GS IO", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81df5408ca1fb53e4c130c0f9fb23fe4d8eb0970", + "description": "r600/sfn: drop store_deref handling for VS and TES", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eafa9bff805a2aa0cb956b31d57d09edc9a04dcc", + "description": "r600/sfn: Lower tess-eval IO", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7421f9d95bd67d29880de18b6a09a87dc2af6658", + "description": "r600/sfn: lower VS output IO", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b8fdcffc4c755241d645c62385729ee14dd8620b", + "description": "r600/sfn: Fix vertex stage export to accomodate IO lowering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "98bb63b2f06a61995d8d0d73aa30263716c6c418", + "description": "r600/sfn: remove unused FS input deref code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3b6c17e2965621a46eb07ba2605d9f9e221a400b", + "description": "r600/sfn: lower IO for FS inputs and handle interpolation accordingly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a45e651b11df0700333d4a13eef102341f06c3ac", + "description": "r600/sfn: Add simplified constructors for FS shader inputs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e9babb853d1d627175833050d42a381702510108", + "description": "r600/sfn: Add lowering pass to convert load_interpolated to load for POS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c41d0d0c3da1cd869b9efe205f8508e95cbcb7b1", + "description": "r600/sfn: fix component loading from fixed buffer ID", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "18e97817148eb376274a8749c03b45c2f817c139" + }, + { + "sha": "d4f662a25227a3d469a516ba0acb6dc4d1a71bcb", + "description": "panfrost: Update the resource layout when doing a tile -> linear conversion", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "00360cd5c85e0b5c52821f2ba6d1bd6964215efd" + }, + { + "sha": "46518327aead81c2e08381ddf4b9dd6312441312", + "description": "v3dv: extend the list of formats supported by the TFU unit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ca44b3edd7fa3d507170fffab2e922e104f97d75", + "description": "v3dv: batch buffer to image copies with the texel buffer path if possible", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2809e2e816069ae267fb429d9c0fafd75db9e58c", + "description": "v3dv: fix leak in the buffer to image copy via texel buffer", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "ba69c36ada5609ef2a43bb34ce9b48786c083aa9" + }, + { + "sha": "aaed7a29bef6637c712c459f84ec6ec7911f1300", + "description": "radeonsi: implement GS fast launch for indexed triangle strips", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4cce4d22a72bf84459ee95223cc7d1c6542617fb", + "description": "radv: Fix a hang on CB change by adding flushes.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "85647152535203aec5a5e77872e3ca501136244a", + "description": "docs: document new zink-flag", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "feb9462bb19096ecfcc11b42d5474c6309594cd9" + }, + { + "sha": "313db572b7ac6c4798ad1d15be3eefe8167bab87", + "description": "nir: make intrinsic order in nir_print consistent", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fb0385b57cc1a7c752f83275b419f010b1337ea3", + "description": "radv: use intrinsic builders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b040737d309698df9d1e94b24205ae133048621", + "description": "nir: use intrinsic builders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ddb81f5da32c0c81e8b9572827f8ae31133d1a7d", + "description": "glsl_to_nir: use intrinsic builders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eafc7eee5799139538398389592800589182b1e3", + "description": "spirv: use intrinsic builders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c9bcad25737418b2f2b754f5c893ad7feb1c59f4", + "description": "nir: add generated intrinsic builders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b3c65f97ad15832c1d01406216f59522e678b652", + "description": "nir: move nir_load_system_value() to nir_builder.h", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f8ee599dfb1fa862d065e01f31d79a9006b74be4", + "description": "nir: remove useless nir_builder_opcodes.h include", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2b9e3f715f991ae155957276cec535b2701c4fd", + "description": "nir: add destination bit-size information to more intrinsics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3ec217a849896a469ceb8afc5a3caa811f0df214", + "description": "nir: add bit_size_src for when the destination bit size matches a source", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "898d7c1f493b8d06168c2256404d448888183bd9", + "description": "nir: use a single canonical list of intrinsic indices", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6407b9b7d311de0669375dc005cc4cfc42ffe1f", + "description": "nir: fix sampler_lod_parameters_pan indices", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "deaebc82a73ba3f34c4f2ecf97cedb7fc9b0b154" + }, + { + "sha": "a569ffeb83daaa3f3c179e0d315a5079334835f8", + "description": "freedreno/a6xx: Fix typo in height alignment calculation in a6xx layout", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "e49748521ec9182e8d2eec823182cc463709123f" + }, + { + "sha": "f7b2165b19c29f8494ac3eb5a40ff21c8d7e1fe8", + "description": "freedreno/a6xx: add support for dual-source blending", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "efba865c4c9e9a922cefabcd86fe01004f6c3ada", + "description": "nir/lower_non_uniform: improve code with the same texture, sampler indices", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5d55ca9c300a6e240cf9216bba203321b7b44c90", + "description": "anv: fix descriptor pool leak in VMA object", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "0a6d2593b8b63d2429e79eed900848c5c9a522c9" + }, + { + "sha": "df940760f9d454e90d743c65cba39cea65ee4a4c", + "description": "aco/optimizer: Propagate scc_needed label through p_wqm.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8bd3fefb74bb731d509f971cc89c687a0fd99feb", + "description": "aco/optimizer: Only set scc_needed when it is actually needed.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "8a32f57fff56b3b94f1b5589feba38016f39427c" + }, + { + "sha": "a941618a1fc361ad6089753b39dc3bcea71d8fe9", + "description": "glsl: avoid an out-of-bound access while setting up a location for variable", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "47c358233d45747def3b796c37ae92df86860141" + }, + { + "sha": "df143fa1ef6e4d97a8a3d50326a5a51ac30b4d49", + "description": "glsl: Drop mingw -O1 workaround for GCC>=7.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf2bcfe4457e727caf48db8c2e5e70e561395eb3", + "description": "scons: Fix build with llvm-12.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "954739fd18edd5c6ce2bb1028266235237834536", + "description": "radv/winsys: fix the sysmem submission path for GFX6", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "cba6ec309abb30964bdb7c96491b0bdee9615333" + }, + { + "sha": "05799844df50d949591e06cd18c293963427acd4", + "description": "radv: Const aco_compiler_statistic_info usage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb9b7d0a6877efb833ce5f34a8675bef23ebd801", + "description": "radv: fix missing initialization of the predication value", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "62d9ca696e08e173ecbb0735602a2139931b0c7f" + }, + { + "sha": "d1ff00cb7684b58b33f3b21448b8a3e704958a62", + "description": "v3dv: remove non-conformant warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4adb643a0add642f093163010d63e94ff149e178", + "description": "docs/features: update list of v3dv supported features", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e5ff2a357ea31cdec7e773762f3be85c29c36620", + "description": "ac: use bigger storage for ac_arg::arg_index / ac_shader_args::arg_count", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "6f130342658b85774c2b8125ff415aab66463153" + }, + { + "sha": "0b3bd7c5167c7d7a87ded47730641a4103996591", + "description": "radeonsi/gfx10: flush gfx cs on ngg -> legacy transition", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "33a78948283c9911a06650bd2ba5467678249cfd", + "description": "util,radv: Cross-platform monotonic condition variable", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4a94527ca18cbbd288e0e808c46b3e2dcb1d42ba", + "description": "util/os_time: Safe os_time_get_nano for Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "192d721e3ba30d01f5501fce31ee14ea16f557c6", + "description": "c11/threads: Remove Windows XP support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e317103753935afa1d1e1486392fc45e3b9e1fa0", + "description": "c11/threads: Remove Win32 null checks", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1afb8e23c70dae79e91e955c1238a4579897a364", + "description": "c11/threads: Fix Win32 timed functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f09456361c4c753f686b8f155f5c8de3f1feebb9", + "description": "frontends/va/postproc: Convert destination when deinterlacing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "49465babdb35d88ed8a283e925d6cd346255d50c", + "description": "frontends/va/postproc: Use the actual image height when blitting", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ff9ea469f6b319cb07ee1ebbb687fa19c9d48e1c", + "description": "intel/disasm: Don't rely on FALLTHROUGHTs to print unsupported SFID", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ae27d7faae490ab7680fb059071d667d7261dca6", + "description": "panfrost: Fix stride calculation for Z32_S8X24/X32_S8X24 formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "00360cd5c85e0b5c52821f2ba6d1bd6964215efd", + "description": "panfrost: Calculate the row stride at resource creation time", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "690232c90fab42e934e297c097c6f84dd3c3d762", + "description": "panfrost: Fix panfrost_needs_explicit_stride() for block-based formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "543ad77e6fbd53420f302c40c3de8ad15bae32c7", + "description": "panfrost: Expose panfrost_block_dim()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4d0cd48fab57cbc54d9fbcb829dec09faec91d3a", + "description": "panfrost: Enable MSAA on bifrost when deqp debug option is set", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fc93eb72813917c26ffaa50ce425997c2c08f01e", + "description": "panfrost: Unconditionally align strides on 64 bytes for linear resources", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "402cabf1ef4db655239768a5159ac3178dbadf10", + "description": "panfrost: Set the layer stride", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "176ce2b9528a6625575e54bedc517a4634c7673d", + "description": "panfrost: Add two helpers to calculate the surface pointer and strides", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "84c4b232ea0d64e63454591922d68bb7d58513e3", + "description": "panfrost: Clarify bit 2:28 meaning in the Midgard texture descriptor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "74165b3a46f66e51a82da47cf269b371f0b86b9d", + "description": "panfrost: Add a minus(1) modifier to the Levels field", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a612c05b57f41dda4ed957bd1fca356abd75bc19", + "description": "panfrost: Increase blit shader BO size on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0f643bbcc8793eaed9098c8b50de87e80c302596", + "description": "pan/bi: LOD is a 8.8 fixed point", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3949e8ba19f70f6e3bbe5453c0f4d8f3f579aca8", + "description": "pan/bi: Always emit a LOD/CUBE word for FETCH instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8040b797a821af9b91e1dd6014e596e6f8d9e1a2", + "description": "pan/bi: Only update LOD mode on TEX operations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "864f4e2b15ebd80856b5d10db79ca28f6451d686", + "description": "panfrost: Set sample_count when packing bifrost texture descriptors", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9e3d2a7d9be29cc2d949d65198533ae9b4e3d398", + "description": "panfrost: Set depth for 3D textures on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "05880bc28510dcd829fae293c605823f07077af2", + "description": "panfrost: Fix decoding of texture payloads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ba5a710b0615ccf289b263450f4c40eca003a6c", + "description": "panfrost: Get rid of the Sample Count enum", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "de0b88223b0bdd8bac2d77f419499e1b6a6c41a2", + "description": "panfrost: Stop forcing depth to nr_samples", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb69d3bf4ef480f841cc08a7132366ceacda8284", + "description": "panfrost: Fix RAW8/16/32 component replication", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "76a428d03b557b5b0467417825f8f565bd1058e0", + "description": "panfrost: Account for sample count in tib offsets", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f0a767f0e41f21713771663a49d2ada0bee9eb2d", + "description": "nir: Fix LOD source type for txf_ms instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4eac4422175164643d4e2ab8cf0f859fb6d02cb0", + "description": "aco/ngg: fix division-by-zero in assertion", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "37a2c9ace6478b1137c9402347331eb23aacf056", + "description": "aco: fix GS with no outputs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fdfa96561e64299a713162088da26753716df295", + "description": "radv/llvm,aco/ngg: fix large shift exponent in ngg_gs_vertex_lds_addr", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d299c2eb0bdaf96f3c43d351688933c67cadd0cb", + "description": "pan/bi: Implement shader-db stats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "45de02bdac83552a2b18de9f4b6ad5a44922f0da", + "description": "pan/bi: Ensure TEXC src0 is not marked SSA", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bbd3a8eaddc12b64fbd61eff954983771f07be24", + "description": "pan/bi: Emit a combine even if we only pass one staging reg to TEXC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9121624fc6d59ceef5685dfdd54331b68d52005", + "description": "pan/bi: Fix off-by-one in RA", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "087e3f8da804f8a333a96fbfe9948b5833ef9819", + "description": "pan/bi: Fix varying writemask handling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eee2a4346154570d6130a05e14afde59bad17a1f", + "description": "pan/bi: Implement sampler1D", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8b23fbb9543672c0351a25d5d204fa9cc731a113", + "description": "pan/mdg: Fix shader-db counter", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "028de3beaf9d44330cbaab24519e8f98659aa570", + "description": "panfrost: Enable indirect uniform indexing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "33a764554204596db67940a5672a7690f56f3a4b", + "description": "panfrost: Add PAN_GPU_ID debug option", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2a1238f3a36f3a40cc709b1a76c7f4e920d4d528", + "description": "nir/unsigned_upper_bound: decrement num_sources_left before recursing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "65fbae16e37b5f349a0d0feb8d54ba132a1f02f4", + "description": "nir/unsigned_upper_bound: fix buffer overflow in search_phi_bcsel", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "72ac3f60261a8510512861b93e843e695331e2ab" + }, + { + "sha": "cf0b54cdc16c9e7197954259e87ae194d65fb0ec", + "description": "aco: fix v_mul_hi_u32_u24 format", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "57c152af9ce2e34c1a8724574c6a2c5d19c0c845" + }, + { + "sha": "ecebc263f5b03550e27df0d1068340b61621fa93", + "description": "Revert \"util: Add helpers for various one-time-init patters\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "bda4d6e0d01116db59a0a03b0c703a7af6e11949" + }, + { + "sha": "86662655ece52ae56afddcb8c9b5ded43719757e", + "description": "Revert \"nir: Use get_once() helper for one-time init's\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "c9062df1d57df19a56288c1749d4b6d22d7c1418" + }, + { + "sha": "5461e212456e7c35ef489e0fc2a4fafddfa79c06", + "description": "Revert \"freedreno/ir3: Use get_once() for one-time init\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "b4ad27a986e1c6899cbf23355c3e9c6de345a323" + }, + { + "sha": "b8fb95ce01cb131f4991f807364ce61fa542e5ab", + "description": "Revert \"gallium/hud: Use do_once for one-time init\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "2e81ec5e009e3fbeef3fe1a76f2dfee428b7c160" + }, + { + "sha": "f20192f7c33cf34e7dd1239d18000e16d5ccf94e", + "description": "Revert \"mesa/st: Use do_once for one-time init\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "bcb2981e145f0c2a54a637e20b6c55eaf316c04f" + }, + { + "sha": "410b65185931a20acd3d7c2371bd62d9c8a7002c", + "description": "Revert \"util: Fix helgrind complaint about one-time init\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "f8c7a43f33d4647c16c4892d56706a14e5d6bf17" + }, + { + "sha": "8fffcbce9bcf393ee0e629aa66eaea4184afef1a", + "description": "Revert \"mesa: Fix helgrind complaint about one-time init\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "f7102ac376a23a394786085a2b0dffa94c13150c" + }, + { + "sha": "1f6e03c3bf51b2ba8cd7f22c069a962beada7f13", + "description": "Revert \"gallium/trace: Fix helgrind complaint about one-time init\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "1aa055539f32fcb3c78d908d1635bb3a3d517bc2" + }, + { + "sha": "f57f040f13cdf8ba7bafc46ce398d12e2a7ef63c", + "description": "Revert \"tgsi: Fix helgrind complaint about one-time init\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "d91fe7d1c65179e64a6ca294135ac1bad2b16fb9" + }, + { + "sha": "8da98beb5d62117192060e23393034a38c9910fa", + "description": "radv: always use 32-bit predication on compute queues", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "62d9ca696e08e173ecbb0735602a2139931b0c7f", + "description": "radv: use 32-bit predication for conditional rendering on GFX10.3+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b1558ec3ec9002c0d79b177d7b77d5a0070297bc", + "description": "ac: add gpu_info::has_32bit_predication", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "472e81ed805a1f8f9e6d1b2ca39c089c674c33df", + "description": "v3dv: Wayland WSI support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9fa1cdfe7ffd9e7ebd83055e2008f3e4b8ada549", + "description": "intel/rt: Implement push constants as global memory reads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f7e24e559fb632eae54e444b022db9da35922258", + "description": "intel/rt: Add support for hit attributes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "96fde5518b5c47550f0f42f3a0504ccbdc4a4e92", + "description": "intel/rt: Add a helper to create the raygen trampoline shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "303378e1dd4119e04e0f6cb623766115a00c2eea", + "description": "intel/rt: Add lowering for combined intersection/any-hit shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cb261b03e5af7862f1321c778e3ad54b640226bc", + "description": "intel/rt: Add lowering for ray-walk intrinsics in any-hit shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c3ddefa000ec09631ac0115fd959e152edca27d8", + "description": "intel/rt: Add support for shader buffer record memory", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9ba7d459a3a193b002586ddbee2720948002421c", + "description": "intel/rt: Implement the new ray-tracing system values", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7ce7c93755fd62ea91e384255e85f6689b6d890b", + "description": "intel/rt: Implement traceRay()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "75209d5bd1f6e93cd52568d87d3ee84f516eec56", + "description": "intel/fs: Add and implement intel-specific ray-tracing intrinsics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1f6ae809ef000a3f0ed20148fd5e8c98d43b62d1", + "description": "intel/rt: Implement support for shader call payloads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "72354b0e9dd338f28e0422be81ccb31cf241a36c", + "description": "intel/rt: Add a helper to create a trivial return shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fad81a3968144f4765a13e70082fdc163650b1db", + "description": "intel/rt: Add a pass to lower shader call instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ca88cd8e5a6d1c36e0f3842eee20edeb1d78eec2", + "description": "intel/rt: Add return instructions at the end of ray-tracing shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "49778a7253dfce7a09c6d0e433c571a543d85065", + "description": "intel/rt: Add support for scratch in ray-tracing shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2b3f6cdc6cf1b2a217af447196e53469a1c9a8d6", + "description": "intel/rt: Add lowering functions for each ray-tracing stage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c7660918d77851d35170e0064d22d2281fed5ac4", + "description": "intel/rt: Add a pass to lower the new ray-tracing intrinsics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6e50db4edac110f59b2381cbb27cb568a4a4b8ea", + "description": "intel/rt: Add builder helpers for accessing RT data structures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1f6e70c85a88bb18f48a42574b77b4c0a27d8dbf", + "description": "intel/fs: Add and implement a load_global_const_block intrinsic", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6d5b57aeb7d381337e7bfda418205240330f3f04", + "description": "intel/rt: Add a brw_rt.h header with #defines for basic RT data structures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7280b0911d8ce22092251a8db6d37b11bdc5a59e", + "description": "intel/compiler: Add support for bindless shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "27f44116fe0bfde34149a676ddb5e75d14fffe7b", + "description": "intel/debug: Add a debug flag for ray-tracing shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a7dd172aaa11fcada1513082ff0abea031090b1e", + "description": "nir/lower_io: Support shader_call_data in vars_to_explicit_types", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "477d2f03794154eccde9e33573d326d11f1c64d0", + "description": "nir/lower_io: Allow ray_hit_attrib in lower_vars_to_explicit_types", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bbbb0600f64347a1b1c6d9ba6d4d5863071d7fb4", + "description": "nir: Add a helper to get the live set at a cursor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c7eccb9ca2a3371aa6bbe912c1088cfa30f548f6", + "description": "intel/genxml: Add BVH data structures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "980950c52100f8f54c68d0ec0cd0023480673c95", + "description": "intel/genxml: Add RT_DISPATCH_GLOBALS and RT_*_SBT_HANDLE structs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "858d565eea4d9e1152277550ee88f7acf4cb71fc", + "description": "intel/genxml: Support truncated addresses", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ba9c714cacd21b6f5ee1c58d219c52af23a80a27", + "description": "intel/genxml/pack: Stash the cloned address field", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9d16d973a70aed61f38607229339a8c4d3c2ad6d", + "description": "intel/genxml: Add the BINDLESS_SHADER_RECORD data structure", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5faf859ebbeed2a529913d064aae76055cce586b", + "description": "intel/dev: Add a gen_device_info::has_ray_tracing bit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1d6434f02ba7ae17664506645f43365b2753ffc4", + "description": "spirv: Emit nir_jump_halt after TerminateRay or IgnoreIntersection", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "630e54a08b7efb4a92e7708fc5cd682767a1a502", + "description": "nir: Add a halt instruction type", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a1281f8a995bcbe6144b43f27bb0c89c28f2a3ba", + "description": "meson: add idep_mesautil to components using simple_mtx.h", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "53f7d539cd93aff35f64a113a7f22fd91f1eb841" + }, + { + "sha": "37a706ad5d99857da187d2e253e8927e7fe23145", + "description": "Revert \"zink: initial implementation of shader keys\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "2be2a500a396fe1dc0e121816e4056874cdd43fc" + }, + { + "sha": "06cb6c6063b989c9f225822ff8616590d2167814", + "description": "Revert \"zink: refcount the shader cache\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "b9fdc21bba0724271520462f3f04ba72ae106a26" + }, + { + "sha": "01f04c7760aa27f7edc138631eb434124d3b6b17", + "description": "Revert \"zink: move shader key structs into their own header\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "ed7a5a55689ddcf10aae3f671e390252aad2d62f" + }, + { + "sha": "a4d03aa2b16a0cd6a94855784bbf8f32ed7bd595", + "description": "Revert \"zink: fill in params for fs shader keys and flag shader for rebuild\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "9aa08221fa6653002dec2a07641d58d3c7fa227a" + }, + { + "sha": "1a25b1b36e791e5c13757d1cd8d07b72e49b1ca4", + "description": "Revert \"zink: put those shader keys to work fixing up fragment shaders\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "eeff625ab31f0a1bf94739e09ca6b08de00c94f5" + }, + { + "sha": "469c59f8eade8fa72dfe0260abaccced69d2aa8d", + "description": "Revert \"zink: update shader modules in gfx program when flagged dirty\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "e96afeeb7bb2306aa0fba76e27163d4c7a86860b" + }, + { + "sha": "025cb90042ba62db8f4be6b612c8f5d2b6b3746b", + "description": "radv: Fix RB+ blending for VK_FORMAT_E5B9G9R9_UFLOAT_PACK32.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "e893102bcf5d6632f36399d745481639344e5526" + }, + { + "sha": "0742edba5ef4aa2253436a449a797daa8f2779d5", + "description": "mesa: Synchronize get_gl_override()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d91fe7d1c65179e64a6ca294135ac1bad2b16fb9", + "description": "tgsi: Fix helgrind complaint about one-time init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1aa055539f32fcb3c78d908d1635bb3a3d517bc2", + "description": "gallium/trace: Fix helgrind complaint about one-time init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f7102ac376a23a394786085a2b0dffa94c13150c", + "description": "mesa: Fix helgrind complaint about one-time init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f8c7a43f33d4647c16c4892d56706a14e5d6bf17", + "description": "util: Fix helgrind complaint about one-time init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bcb2981e145f0c2a54a637e20b6c55eaf316c04f", + "description": "mesa/st: Use do_once for one-time init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2e81ec5e009e3fbeef3fe1a76f2dfee428b7c160", + "description": "gallium/hud: Use do_once for one-time init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b4ad27a986e1c6899cbf23355c3e9c6de345a323", + "description": "freedreno/ir3: Use get_once() for one-time init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c9062df1d57df19a56288c1749d4b6d22d7c1418", + "description": "nir: Use get_once() helper for one-time init's", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bda4d6e0d01116db59a0a03b0c703a7af6e11949", + "description": "util: Add helpers for various one-time-init patters", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "53f7d539cd93aff35f64a113a7f22fd91f1eb841", + "description": "util: Add helgrind support for simple_mtx", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7f223a2329a7d392b8860f488b3797012cdd37fe", + "description": "spirv: Implement SpvOpConvertUToAccelerationStructureKHR", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "879b87cf8cc59087e81b51ea26738c013c6cfa95", + "description": "spirv: Implement OpTerminateRayKHR and OpIgnoreIntersectionKHR", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "66685679b7c047398d3f593db86a24eba38db3b9", + "description": "spirv: Call repair SSA for OpTerminateInvocation", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "886d2d1a9abcb0572a957c24ae44de4d6c055bc0" + }, + { + "sha": "6885cc6f654e66eb53d5e3b44b3e071c9e0621a9", + "description": "spirv: Implement OpTraceRayKHR and OpExecuteCallableKHR", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "946d5ca004c33435a648ae115b859caa607e78d6", + "description": "spirv: Update JSON and headers from Khronos main", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b15caf190b574b4e58b29eeb5b241abccbc43de1", + "description": "spirv: Rename some ray-tracing intrinsics to NV", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "22c1e0974786249066c27e15fd7ebec74e5a5a62", + "description": "vulkan: Update XML and headers to 1.2.162", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c0286fc09b72bd8e7e43bdccb30a89b023d79452", + "description": "zink: fall back to util_blitter for scaled resolves", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "19906022e22cb37493861b6976c9623618b5b769" + }, + { + "sha": "1c17223c02b68679d67a4e4a6be8b9b7a80fa2e9", + "description": "nir/opt_peephole_select: respect selection_control when collapsing ifs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "28395407eb86d17a72b970262d4089c73975ae11", + "description": "nir/opt_peephole_select: collapse nested IFs if applicable", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "000ea77938725d81588f2cc5120a09798e51f192", + "description": "v3dv: fix allocation size for BO handles", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fa5429390b2236f4cb92336b122d7a16be7a5fa7", + "description": "v3dv: remove obsolete comment", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "19da9bd10d94bbda211c6395aef34fe97daabebc", + "description": "clover: Fix typo in comment.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "105fc1c615eb4963503c738c68766f830bc5a662", + "description": "gallium: fix missing bit field in p_state.h", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3dc6da1ac1f5be2d6421940739b88f7e8d36cdf9" + }, + { + "sha": "87c70f1984fc6f9be6c32559ecfbde1946ff7360", + "description": "lavapipe: enable pipeline stats queries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4263162839d67f08c69895925ffc19cb4fbf4c42", + "description": "lavapipe: fixup mipmap precsion bits", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2c0a078fdb4b856ecc29d5cf9b0478afc2309aa8", + "description": "llvmpipe: fix multisample lines.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d932720ff7dd8fafbb200d9f91c3956d5f0e775e", + "description": "llvmpipe: fix multisample point rendering.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ed54033def4e56e2ec9de7d25f7d4db30523b77", + "description": "llvmpipe/setup: move point stats collection earlier.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f24645653863564eb059c1200a52b25f659216e5", + "description": "lavapipe: fix wsi acquire fences", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d90c7cbc4d005fe6245d0a19985784940792403", + "description": "lavapipe: fixup device allocate + enable private data", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ac396e2e5b966a247b698dcc44456fff1ffa0df", + "description": "zink: fix layered resolves", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "19906022e22cb37493861b6976c9623618b5b769" + }, + { + "sha": "989877365d236e553e740dd9b77dbf524d8d4a98", + "description": "release-calender: Update 20.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f60fabc38f72c1a201698ca263b4fadb5b9df1dc", + "description": "docs: update calendar and link releases notes for 20.2.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9c2e8a8f90a09a0ec2d0a4bb9592e57311e17a1e", + "description": "docs: Add relnotes for 20.2.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ad2b1200875bc3f99c5302117ebc58f624a3e14e", + "description": "docs: add release notes for 20.2.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8e961b91c320125c81fbae0d8f9f6076ee58aa3c", + "description": "aco: optimize v_add+v_lshlrev to v_mad_u32_u24 on GFX6-8", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d9e4504b0dc6752f1820deb01eb829b4f2af3a45", + "description": "aco: optimize v_add+s_lshl to v_mad_u32_u24 on GFX6-8", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eaef1f21278ff8ddd534946c562b211b7208c155", + "description": "aco: allow to use the range analysis UB in emit_{sop2,vop2}_instruction()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "be600b009a8c4508a488f71b9eaf5e6901dff245", + "description": "aco: add a new Operand flag to indicate that is 24-bit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "05fd780012894c3b6527df6d434c4cb3c82d7772", + "description": "aco/tests: extend the optimize.add_lshl tests to GFX8", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cd59c22325293d17ed76bc803d927407ae0a7abe", + "description": "ac,radv: use better export formats for 8-bit when RB+ isn't allowed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "684531fd37ca3aa15dfb34f34ada10615ff66864", + "description": "radv: add new vk_format_is_*() helpers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a5227465c13ae74651a932a82aeae65683f4a063", + "description": "meson: use a feature option for microsoft-clc", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "ff05da7f8dc4aa531704d48f718514e3b1fff45d" + }, + { + "sha": "7ca4a478ad307e3e706e86b2786929a6a3b7dff8", + "description": "meson: Don't add extra values to shader-cache", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "5de56937a3d009659076dc67de6a57379fc7a31b" + }, + { + "sha": "a92f597b98bb032b904c7c8a8c3a9fe798b51915", + "description": "freedreno/ir3: Fix valgrind complaint about streamout state", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9de6a601ce61fd3466aed0054b1759fa3fcdf162", + "description": "freedreno/drm: Quiet timedout error msg", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "156d7e45f745a1567fc3b69a4f6faa3c54eec49e" + }, + { + "sha": "98d182fd46c90934f85700f9d2b35190ea4c848c", + "description": "freedreno/a6xx: Clear control mem at context create", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "150a914a786281e20a97fc03d7aaf4d6454bd618", + "description": "freedreno: Convert one last mtx_t -> simple_mtx_t", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8651cfbbf05516fb901a766ca562a5101c835f7d", + "description": "freedreno: emit_marker() cleanup", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b039e03f55517287d73b485802a8537e3639141b", + "description": "mesa: add an environment variable to default enable INTEL_blackhole", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f5610d99492ab981f3bcf2f8a3baaae882616f1d", + "description": "st: trigger noop if the default value is not true", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "76ade57fa6585a388fe81f8a3a9235230628a727", + "description": "ir3/ra: Fix array reg liveness in scalar pass", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "88b5a2b80bc60f0d7416275835a264c7ce19b656", + "description": "nir: fix gathering cross invocation info", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "5b77b14448c39d664460fd00d2a356d47858880f" + }, + { + "sha": "79bd8edd87e8d1f72b0d80918463319653a91a27", + "description": "swr: Pass draw start information to state update mechanism", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c83cc49f6bcf6cb6fd3a061fc10b6139b77afb81", + "description": "ci: fix name of the Sienna Cichlid expected failures file", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ce5c23eb009b1453347e87099a4dfc72d4c8c793", + "description": "v3dv/cmd_buffer: missing (uint8_t *) casting when calling memcmp", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "e1c8041cde64ff72dab504ee0e31c2b5224145d6" + }, + { + "sha": "14ec91b1319254c5ee608fa0d6c0110439c29d7b", + "description": "radv: dump BO ranges into bo_ranges.log instead of stderr", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ffa6acb0decca0a8eb41b41e985b2f92bf8d1ff", + "description": "radv: add RADV_DEBUG=noumr to disable UMR logs during GPU hang detection", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a61a398f7eb4f2dd50722e152895a1e54b599e14", + "description": "radv: dump application info in the GPU hang report", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8d7f78ccf8f2079492f3b135f4a374e45942e7fc", + "description": "radv: append a time string to the hang report dump directory", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "15e1b530f6e010e639a193422eed3736fc060bf3", + "description": "radv: print more debug messages when generating a hang report", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f7364c9fe0ad2e8bae94045b4ffef13a12f49faa", + "description": "radeonsi: don't allocate LDS for TCS inputs if it's not used", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a4ba51e5be5462fd191970243512852c90aedc7e", + "description": "radeonsi: don't insert barrier between VS/TCS if all TCS inputs come from VGPRs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "61fe66a2e433c5565153ca800e81c36a17c7cba1", + "description": "radeonsi: pass VS->TCS IO via VGPRs if VS and TCS have the same thread count", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6f130342658b85774c2b8125ff415aab66463153", + "description": "ac/llvm: prepare for passing VS->TCS IO via VGPRs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "98b2aacfbfe3cbdeabed3200b98cb02915d08b29", + "description": "radeonsi: remove unnecessary NULL checking in NIR tess functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1190808eca6c23d18ff1c2ba7abc99601b221bb5", + "description": "radeonsi: if VS and TCS have the same number of threads, merge the conditonals", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0aba1743611109545bba86bf055a04ecbe210132", + "description": "radeonsi: always return void from si_build_wrapper_function", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a56e92c79e35a44590e20a5b9032b0a29db80c73", + "description": "radeonsi: merge TCS and TCS epilog conditional blocks", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c605de30ebcfe1eb883c011dcb7db69e38c015c6", + "description": "radeonsi: don't generate a dead conditional in si_write_tess_factors on gfx9+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5df5ee2722f44782d8bb6562d0e11ffff813ed46", + "description": "radeonsi: limit HS LDS usage per workgroup to 16K to allow at least 2 WGs/CU", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bdee9dc633fd3aed4c27bf09c285dfd788c803ad", + "description": "radeonsi: don't allocate LDS for TCS outputs if they are not read", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "10beddf6595499f17c6c23ec0409160fe1e952df", + "description": "radeonsi: don't leave more than 8 unoccupied lanes in HS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b5b5cbc53430d39993db5fb3bcd9f99ed006f2c", + "description": "radeonsi: adjust tess SGPRs to allow fully occupied 3 HS waves of triangles", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "96593847445bd4f80ef65040dda5f74c26c7a80a", + "description": "ac/nir: fix a typo in ac_are_tessfactors_def_in_all_invocs", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "2832bc972bfb512b314a3180a1258be984807760" + }, + { + "sha": "bb6bdeb2305cda83fe58617b628b78ff968b2d00", + "description": "r600/sfn: Correctly lower all int64", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "861d48ea0b1831dcfe95c0c3ad3afaa5808e0408", + "description": "nv50/ir: Initialize Program members in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8976918efa2cb5d08fc762b3aea125f18aa0ebd5", + "description": "r600/sfn: use a per stream index register in GS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "335c48ab330a5e3d7cc2bdd905eba44a3a80e982", + "description": "r600/sfn: lower bool to int32 only after common optimizations", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "f79b7fcf7c7f5db626efdb63f27e8bc64d0aed77" + }, + { + "sha": "5b77b14448c39d664460fd00d2a356d47858880f", + "description": "nir: Use src_is_invocation_id in get_deref_info.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e78c089aae5789d2ad2698beb9f13c12736f7443", + "description": "nir: fix gathering patch IO usage with lowered IO", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "17af07024dfc8302b37a270cea4ef3eae06fe5e2" + }, + { + "sha": "aadfdb962ffa425a230d972c28351c8879072b6b", + "description": "nir: fix gathering TCS cross invocation access with lowered IO", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "abe9588ff048656c94d2074ca3d852ac371e0c7b" + }, + { + "sha": "9edfbd629611109d56d11943f92a4e4bcfebf3ab", + "description": "mesa: lock Shared->TexMutex only once for a glthread batch", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8e7270de551e25acc75b2acb496320cf312c96ab", + "description": "mesa: lock Shared->BufferObjects only once for a glthread batch", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b8684672ff39d49bc2db3564b8c77784606a5d9d", + "description": "glthread: make glGetActiveUniform return without syncing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5f820b38d41a3bea956fd79ec6930fea860b50d9", + "description": "mesa: make error handling for glGetActiveUniform glthread-safe", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ac4dc6e139b8ed84622ba0fcdcea4ec15e60b586", + "description": "mesa: add glInternalSetError for glthread", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "347a3d68cd8ed31fb50a7509f1526a2930d0cd50", + "description": "radv: Dump BO VA ranges on hang.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ba9906fc8f27aaeff1c90f3e2b125173bc3684c4", + "description": "clover: Initialize command_queue member _props.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "e42a7fa03786603b5069afb762006623d71ae7e7" + }, + { + "sha": "f8dc22bf61c1e6008f6954ffd25c1ee322f500c6", + "description": "meson: drop deprecated EGL platform build options", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4da8323a14b58303b41826382f1951131171da0f", + "description": "gitlab-ci: drop deprecated platforms that snuck in when nobody was watching", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "c56f09124b195c5cbaabdd7eadbb0523bede3abb" + }, + { + "sha": "14186a1b845b60f45fa3ad34a072dce1a6cbe67e", + "description": "aco/tests: add Builder::v_mul_imm() tests", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aab507c6b051be99551373b36be19d14a9cbeab0", + "description": "aco: use v_mul_imm() for some nir_op_imul", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "02c5519e6c6bca75f9cd5603fa6e8a6e51eff4e3", + "description": "aco: try harder to not create v_mul_lo_u32", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8ca23bcf39623b5ca0d4a1d2c41b4509d1c737f9", + "description": "aco: copy constant to sgpr in Builder::v_mul_imm()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "756bb29391bc368d38472b825510320ddae6042e", + "description": "aco: create vgpr constant copies using v_bfrev_b32", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4d93fc25f0667805c8e8a95febf5adc4052f9282", + "description": "aco: count v_mul_lo_u32 as 16 cycles", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "70d665d981bfef6735fb7178e4c481d467e1176c", + "description": "aco: don't create v_mov_b32 in v_mul_imm()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "35613c752f5826dcc5bbfbfb8e6ab0ab3127b7d3", + "description": "radeon/vce: Bitrate not updated when changing framerate", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a59b1b18a95af1f8edb0093baf508e974e3251a2", + "description": "glx, egl: Add LIBGL_DRI2_DISABLE environment variable", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "27612984a4d414d118d97ab17a9b2f66235d0884", + "description": "gallium/vl: Set modifier field for winsys handle.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "c786150dfa514c35b0c384cc5184282d58c1f8bb" + }, + { + "sha": "8c3abcab6d889c17c19ed28ba8401b16d6599891", + "description": "nir/copy_prop_vars: avoid a duplicate lookup if src == vec_src", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "33eeb994be136ac4046b75a2088aa40a8f005c70", + "description": "nir/copy_prop_vars: use nir_deref_and_path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "abc3225927453786de240b4493507318e8ecfa4f", + "description": "nir/deref: add helpers to lazily create paths", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7d8c06d484fdb9514d49ead04637bb554e25b01d", + "description": "nir/search: check for changes before adding uses to worklist", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "556a20afe6a1222897f81f6d87ee0d003aa03539", + "description": "nir/search: check instr type before adding to worklist", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4832262560d3156cf47be14c52128ae2da4c7371", + "description": "nir/loop_analyze: initialize loop variables on demand", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "be7b20135e50a15081de54fb24c731503df660a5", + "description": "nir/copy_prop_vars,nir/dead_write_vars: ignore read-only loads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "276e241b579d8935c2963d7ac2c237f7f9e9a4b6", + "description": "nir: add nir_var_vec_indexable_modes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fb01882ddbbf9eedc646259f24917f06766465b4", + "description": "nir: add nir_phi_get_src_from_block() helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8d477baa4f917f502758268ade6590fef7a89d4e", + "description": "nir: allow for cheap intrinsics in nir_opt_peephole_select()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5cc29fccbeb761f124a71fc3d0c27d55a51596e9", + "description": "radv/winsys: Fix use of nonexisting struct type in sizeof", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9eda7f475cfebe79cefdb2ab19daeaa29030c704", + "description": "radv/query: Avoid hardcoding array size constants", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b4d6131c156103ebf0ed683a71c51337c7d4834e", + "description": "radv,aco: Compile with -Wshadow when available", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cba6ec309abb30964bdb7c96491b0bdee9615333", + "description": "radv: Fix -Wshadow warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a978602d1f2faa11502ca63f75d1c528c90e8ddd", + "description": "aco/tests: Fix -Wunused warnings in release mode", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5231c788ff0da6f66426e2d7fd3d0c799094f66b", + "description": "aco/tests: Fix -Wshadow warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2bb88743203f9347b347b9db6dd593df060db819", + "description": "aco: Fix -Wshadow warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bc7f442d8e03919cf7f8e6a470b3c0406e11a1f9", + "description": "radv: ignore other blend targets if dual-source blending is enabled", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c08144b8ba0502a41ba571d7ead54d8cd5446cdf", + "description": "docs: add basic docs for d3d12 driver", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0678fdd983dfbc3f82671a66f6dbc7607f955e99", + "description": "ci: add list of expected failures for Sienna Cichlid", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "94c00be49fba0bbfe39836844c622c02dbe142a2", + "description": "v3dv: only write new uniforms when needed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "070dccdad934aa29a12e852b47a3288d29a8de9e", + "description": "v3dv: remove redundant free of default pipeline attributes BO", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "54e8138bbc8f694eaa47bfb289251ae7c7e12210", + "description": "r600/sb: Initialize sb_context members in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bbd21c403826d9b7f0499690783ecbbf1b2fad01", + "description": "r600/sfn: Initialize ShaderFromNir members in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c1b675bdd5e9217eaa6258deaa5b30087cb0327c", + "description": "r600/sfn: Fix typos.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "50fef61fa54899856fc66516b22a82d327fe8ed4", + "description": "intel/fs: Add support for printing half-float immediate values", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "91f7e262e1cdf6d8e5cbc8c3cd4370ad354e14ae", + "description": "intel/fs: Silence unused parameter warning in filter_simd", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f0907a4f591449de0dd7cfa4b196ce65d6aa156", + "description": "intel/compiler: Delete redundant MAC declaration", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6edc2a3d40406f5b0a5edd5bcccd3232ad50a61f", + "description": "intel/compiler: Rotate instructions ROR and ROL cannot have source modifiers", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "1e92e83856b1ef8a827a63359d59ed7883ed4994" + }, + { + "sha": "366de8acc1763de04c052fe9d0cec34c138dd2c2", + "description": "docs: Update Mesa GL enum allocations for EGL_MESA_platform_xcb", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c41cd7c32454a94547f7a0e8da44282e3cef4813", + "description": "radv/winsys: add missing Van Gogh and Dimgrey Cavefish in the null winsys", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0c8cee39f45b82ea18a7575174aabf7c7f85e4da", + "description": "radv/winsys: fill real PCIID for Sienna Cichlid and Navy Flounder", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bac6cc586fe4c1b24351e0574d3a961eb631f6ae", + "description": "ir3: Enable nir_lower_vars_to_scratch on a6xx", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ea3db9f5960c50da34730cb08023947810b47aaa", + "description": "freedreno/a6xx: Implement private memory", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4d44461dd51c2c5ce60cc8b99c8863e2567609c9", + "description": "tu: Support private memory", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b525934f26a52ba81a5b76ff1085c341c2a0a066", + "description": "freedreno: Add per-device parameters for private memory", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ae109ca83c4872364d3195600ba002512c8d9385", + "description": "ir3: Properly validate cat6 half-ness", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4970aa55771e41ca0eb6dd8a1d707bb846c0d694", + "description": "ir3: Initial support for private memory", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "32cb01a4189894f9a5e68386b40193f3224ae5bf", + "description": "ir3/parser: Fix st{l,lw,g,p} and ld{l,lw,g,p} assembly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "504142ff754e521aa88a6e95c63683a71323878a", + "description": "ir3: Fix STP/LDP assembly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e7471ce77612b746b01a4926a7d04b0921044ae0", + "description": "ir3: Support assembling & disassembling getspid/getwid", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2cee8642ca3a3cbf2eea6c490a329837a34a2ffc", + "description": "ir3: Add more a6xx-specific cat6 opcodes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c82d7be193474f590ab6d9381a39deb8943d787b", + "description": "ir3: Expand cat6 a6xx opcode field", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "92fe6fa0ccff47953ce3c2cbc550323d6ccac1b2", + "description": "freedreno/a6xx: Document private memory registers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3d5bed03e13859b6aa066f3b7dcb9c699726c9d4", + "description": "freedreno/ci: Strip location from asserts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "df29d0a111da3a555d8f0751722ca70fe0c5ffc0", + "description": "glcpp: Handle bison-3.6 error message changes", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2be8cebd0b062aa91a6198ba8ae99cc8749b2e8d", + "description": "amdgpu_bo: make cache_entry a extensible array", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "111a1b2e1c073b34917d4720dca11fb1ba7e6b46", + "description": "winsys/amdgpu: make RADEON_ALL_BOS a debug only feature", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "631e18d4275dc46cf47c969e85d8ec2d3d0262be", + "description": "aco: create v_mad_u32_u24", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1200f6da0b64b127a631cd33ab60ac9cbe6bee31", + "description": "turnip: implement VK_KHR_depth_stencil_resolve support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "13b2beb41597a8c89fa1f74639c143d95931ed56", + "description": "ac: Don't negate strstr return values in ac_query_gpu_info", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "b635dff25620b948080d31211ca487a71203af4d" + }, + { + "sha": "01e3f43007952c352496b2724704bfc4af1319e1", + "description": "v3dv: remove box check from texel buffer copy fragment shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "745f0b8a31c82adcc5093a5dfd7ede9a87305b63", + "description": "winsys/amdgpu: move amdgpu_winsys_bo::lock for better packing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bccb9a745745b2d7fbf89a900ecfeff4a391ee21", + "description": "winsys/amdgpu: replace amdgpu_winsys_bo::initial_domain with pb_buffer::placement", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9c239aa6386f58356bf3364977f078579799c43e", + "description": "winsys/amdgpu: replace amdgpu_winsys_bo::flags with pb_buffer::usage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "37cdce0146dbc89806a93555a00fc00718b0533a", + "description": "winsys/amdgpu: remove amdgpu_winsys_bo::sparse", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a09bc2db1898488f76cf03fd65bd5d920a21ea1c", + "description": "winsys/amdgpu: remove amdgpu_winsys_bo::u::sparse::flags", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "31290f98061acc237ba0f5d9c8c4c38ad6075c70", + "description": "intel/fs: Fix sampler message headers on Gen11+ when using scratch", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "97ebb896afb6801d1fcd69556583975411379998", + "description": "intel/compiler: Do interpolateAtOffset coordinate scaling in NIR", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d3b61dd7e8ce80f5ec0cbe27e901747e9e0e61d", + "description": "iris: Stop quantizing the depth clear value", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "afa25cc49de8c99ea3034d511b948069794b3f5d", + "description": "ci: Bring freedreno into the \"warnings clean release build\" fold.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b537d425b39052d9069643d92b465790c1e113ab", + "description": "freedreno: Suppress uninit var warnings from shader stage switch.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8ae38885d61b0efb58e1549d41c9577bdadee62e", + "description": "freedreno: Fix uninitialized var warning in afuc using unreachable().", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "239bae7b942dc900b3768e42b96580dc4f85068a", + "description": "freedreno: Fix warning about uninit size for the size==0 special case.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "536ec9d7f5ab3bba904090abf7b3fc58e92eec78" + }, + { + "sha": "530a32bb9593704cec291b613e75b91ecdfd6aa4", + "description": "freedreno/a6xx: Fix use of uninitialized img->level in the SSBO/image path.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3390870cec4e3e3ea90f62f8d46cefd80417c6b9", + "description": "freedreno: Fix release build warnings for asserted temp vars.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d1b2d15cb9904423c28c5718a51bc175bcf24b1b", + "description": "ci: Enable -Werror in more clover builds.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "17ebce2f1308a8b95c0eb0a761032fa3e0961310", + "description": "d3d12: transition the right planes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "968e360e4706d4d84311f7bfdfdfd04a3af0f92d", + "description": "zink: track custom border color samplers and verify against device limits", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4d498f5eaca76e6748dfef68bc8b78fb92c71087", + "description": "zink: handle arbitrary border colors using VK_EXT_custom_border_color", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f493fc5fa2b1acb620f7d2fb052273502edf8e92", + "description": "spirv: workaround setjmp/longjmp crash on MinGW", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9e4f588318f543a8ba485159efa71cd23016d57b", + "description": "llvmpipe: fix arith-test build on msvc", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "03cfc93ab5ee8a0c6f34f893e86baa3a40582e85", + "description": "winsys/d3d12: Use MakeWindowAssociation to remove DXGI's alt+enter handling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81adf127016a999dcd9b0344f53b74bdba4f09e4", + "description": "d3d12: also reject GDI-supporting pixel-formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8bbc31aba2f3e048c335422e041267ff47a87a90", + "description": "d3d12: Release swapchain buffers before resizing them", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "be4475c4a73dc71c8868f5508064feb6bdec0a22", + "description": "d3d12: Implement winsys framebuffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e74b3e570a6a4d42577912e9d37ca60771df2c9d", + "description": "wgl: Wrap stw_pixelformat.h into extern C", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "584061bdcdbd1d450af68c00f4ab9516584d66cf", + "description": "wgl: Create third buffer when drawing to front buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ece2cc3352f52858587d13092b4300b0d6447760", + "description": "wgl: Wait for fence when not using winsys framebuffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f49ff1548be1306ece10cb3399fa55227025cf49", + "description": "wgl: Call flush_resource() before presenting", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0e79ee3741dca626cf614503261c11dc083d19da", + "description": "wgl: Flush in-between resolving buffer and presenting", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d60913e39259c867b9dbff93efbd9005b00b33ad", + "description": "wgl: Use winsys framebuffer interface if present", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "23bfe0356748c6d3e4bb9db490910c0f29c70197", + "description": "wgl: Add winsys framebuffer object", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "acf8af458f50600bbbbffaa46f782fec6ffe2df6", + "description": "wgl: Add PFD flags based on stw_winsys callback response", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c28eb3c6aac99decd7563b4ef987c08aaf9c7a6d", + "description": "wgl: Add stw_winsys callback to check which PFD flags should be added", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb9cc7ede4daf1d74b2e8140008e2f1fe03b96bd", + "description": "d3d12: Add D3D12 WGL winsys", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3b034e97716f0862151e5a52610edfef46ad0b16", + "description": "wgl: Implement get_adapter_luid callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6d2d3cb580ca24312669f01cc27354ad6c26c7a", + "description": "wgl: Marshal HDC into screen creation and LUID querying", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "494bd9cfa66e96938b8f888c9397a601d6db47ec", + "description": "d3d12: Pipe adapter LUID from callbacks to D3D12 screen init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "29996b88c2bae10a61cc2196e49b2fffd597d929", + "description": "d3d12: Add glon12 target which only includes d3d12 driver", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fd361dde6557fd209834b822dfc02dabef7e9c57", + "description": "d3d12: Support WINSYS_HANDLE_TYPE_D3D12_RES", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7145cd6e7357a48aa4b124714593e49e8c8744e2", + "description": "mesa/st: Introduce WINSYS_HANDLE_TYPE_D3D12_RES", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5c305c8e36a08b235ffc348f35c0b28a28395b22", + "description": "v3dv: use VkSurface to retrieve an authenticated display fd", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0fcd379184d658285f3313c5c4026253e0ec6930", + "description": "aco: fix combining max(-min(a, b), c) if a or b uses the neg modifier", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ff22fcf4500e74fa518f5667b442931fee5ba85a", + "description": "radeonsi: fix scan_instruction for bindless inc_wrap/dec_wrap atomics", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "25fff591c1162a9c1158b0e69de3a6edf89f42d4" + }, + { + "sha": "d0657ee837955a7d9f8e1a9a5f0202b6d6dd2970", + "description": "radeonsi: fix NGG streamout regression", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "81d106d6ecab326c96fb9f8db8e7f39621c9816e" + }, + { + "sha": "be905b74f7425935b3ca1be26fd8bf04833c73de", + "description": "radeonsi: don't add num_vbos_in_user_sgprs to the shader cache key for non-VS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4753235406ab207b0f60010358861109b4840c78", + "description": "radeonsi: don't do VGT_FLUSH before fast launch on gfx10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e2b784e74bb16f9202cd04fa5bca6aa11002a1a2", + "description": "ac: rename num_sh_per_se -> num_sa_per_se", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "603b5340b9e53d3488f404a66a80fe2ca0395930", + "description": "ac: rename num_render_backends -> max_render_backends", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f2977a162af46ff0b9d9334bb677b768900ba5d3", + "description": "ac: fix min/max_good_num_cu_per_sa on gfx10.3 with disabled SEs", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "9538b9a68ed9aa0f8a231d6bf681f6f0a2a9d341" + }, + { + "sha": "b635dff25620b948080d31211ca487a71203af4d", + "description": "ac: fix detection of Pro graphics", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "bfb92875992599d9c5ca5ecf39fce36a1719272d" + }, + { + "sha": "d425d765bfe837df66c85a02998063e91a0b97f8", + "description": "ac: add build_alloca with an initializer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "025bc9e50e2489f9a312b92435409f58f4cd35f7", + "description": "radeonsi: add options.inline_uniforms to the shader cache key", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b7501184b90a20015885b3f2276a7a5ceaef31a7" + }, + { + "sha": "a73bdf4853494aab39413b960ef6e240e4800eb8", + "description": "radeonsi: remove VS input loads when culling with rasterizer discard", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0ab7ab40c8765a2d10df510ce17d43a87eec8f06", + "description": "radeonsi: tweak triangle list culling performance for GS fast launch", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2b2b22f496d073847bf94f4db9ba47275444be58", + "description": "radeonsi: read vs_state_bits in vs_prolog correctly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ea90d8a74498148a1abbde3d554fc3ce3d37cb67", + "description": "radeonsi: don't subtract max_verts_per_prim from hw_max_esverts on gfx10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e29e41a3cd84702a7ea6874f314fca897ca76983", + "description": "radeonsi: determine correctly if switching from normal launch to fast launch", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "3da91b3327fb93d0364c0ca9d0216f695160831d" + }, + { + "sha": "8d2876a34314e7378eff67a5f320eba56683dba3", + "description": "radeonsi: only do VGT_FLUSH for fast launch if previous draw was normal launch", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "3da91b3327fb93d0364c0ca9d0216f695160831d" + }, + { + "sha": "c4ebdf9ee7eac7241b5185d7d496e96384a41ecb", + "description": "radeonsi: do VGT_FLUSH when switching NGG -> legacy on Sienna Cichlid", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "9538b9a68ed9aa0f8a231d6bf681f6f0a2a9d341" + }, + { + "sha": "74ea26f61367983640330cd572d66d30cf5ba3a8", + "description": "radeonsi: fix min_direct_count value", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0ce68852c1a8d37e837d64f97a2a2ef5d6b669a5" + }, + { + "sha": "802fabdaa4be7fd1847ec35920663064689a253c", + "description": "Revert \"anv/image: Define anv_image_get_aux_addr (v3)\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "87dc3106b077199b829a082e32ec33d0c6d400ab" + }, + { + "sha": "783b895ec9fe15af6214de28e262e20f18ad4cf1", + "description": "nir: Rewrite lower_undef_to_zero", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ff05da7f8dc4aa531704d48f718514e3b1fff45d", + "description": "microsoft: Add CLC frontend and kernel/compute support to DXIL converter", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1885e356e6a4d425471c0bfa0aee5fcf98d19186", + "description": "spirv: Allow spirv_to_nir callers to provide a float execution mode", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a0aaba26cd4860a51c63ede351e02e4a7046c2c7", + "description": "nir_load_libclc: Mark libclc shader as internal", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cab995b4630cb6143a3172b44efda06ad2fa6051", + "description": "nir: Make nir_build_deref_offset() support ptr_as_array", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "949f8572ec3959be0ac21aa8857515452c180500", + "description": "vtn/opencl: Fix alignment for half vload/vstore", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9c4dce1d9660a92ad62829284c24ac6292cf774c", + "description": "nir: Add nir_alu_type -> glsl_base_type conversion helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7d3aec9905e7c2702cefb9d18fa948ee85be11fa", + "description": "compiler/spirv: Handle the LocalSizeHint execution modes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a42c846d246dc58f4f81bcfed026ea93e951c519", + "description": "nir: Fix nextafter() for hardware that don't support denorms", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b94b827add3bd840b407f190099deff7c8c7e26f", + "description": "panfrost/util: Move nir_undef_to_zero into core nir and add 'lower'", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c730ace12b51d46585fac6710fbe57dccd67071c", + "description": "nir,clover: Drop nir_lower_mem_constant_vars", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2e8e275075fa6301094e73c1ceda68585f3ae64a", + "description": "nir: Add a more generic helper for gathering constant initializers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f727e98d22f8a897b2b5ea5240c3e10c34534389", + "description": "nir/lower_io: Add data OOB asserts to write_constant", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "791fecfae126a784be4f59d7b6a148bcd79419a5", + "description": "microsoft/compiler: Add scope for declaration in case statement.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b9c61379ab4c5065d624fb9403c1df9d5589b313" + }, + { + "sha": "f4c240f34d4d2cc73e8914ac9696ad0f1f456064", + "description": "microsoft/compiler: Add struct glsl_type forward declaration.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b9c61379ab4c5065d624fb9403c1df9d5589b313" + }, + { + "sha": "6a4407d2699831ab4ce0f281723f8612c0aa33b7", + "description": "microsoft/compiler: Add struct dxil_features forward declaration.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b9c61379ab4c5065d624fb9403c1df9d5589b313" + }, + { + "sha": "edc63ad44c73e55a67f3ea86746513e035ddd347", + "description": "nv50/ir: Initialize GCRA members in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "602d4a78bc58b915c14eae5af184cb6b6126d446", + "description": "radeonsi: handle pipe_draw_info::increment_draw_id", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c4310f70aa2707c034d420388eccf75a6ff39298", + "description": "radeonsi: swap DrawId and StartInstance SGPR locations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f14a05d618df2d72a80169d82f1bb1db43e2face", + "description": "radeonsi: don't load DrawID for indirect draws if it's unused", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cbb8090bea3d0cd2fc88129115e3e9c8f52f418f", + "description": "mesa: don't FLUSH_VERTICES from primitive restart changes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "51c7c64f0a43c337e6816d2dece44d26c48f8cb8", + "description": "mesa: add primitive restart state to Driver.Draw parameters", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e8c0c80ecd713b1eff30fca423a9c31747669650", + "description": "mesa: index _RestartIndex with index_size_shift", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "52c20a6eea3b0c2ac4085be2e7732211aa3d7536", + "description": "mesa: move primitive restart enablement determination from st/mesa to main", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6dd8b6518e1a5a71313440f05e9507b8f0f041ef", + "description": "mesa: remove constant drawID parameter from _mesa_draw_arrays", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7a8880f871035f521f8319df8c5ebfdded6f5a02", + "description": "mesa: clean up GLboolean types in draw.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4656df5a65f2cd74a26097e4419f8bafd5beb0ab", + "description": "mesa: clean up Driver.Draw parameter types", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d6df35b304d272cad58e666a82ec3c1fa31b85a2", + "description": "gallium/u_threaded: add support for multi draws", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "52ad436740670d24040ee70bb2e155a47f789f42", + "description": "gallium/u_threaded: store start/count in min/max_index for better packing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1cd455b17b7ac9260fb1e943c5993e8cf747abb1", + "description": "gallium: extend draw_vbo to support multi draws", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "80b391077f66eff22a544be679d4b918691026ae", + "description": "gallium: add missing bits of the direct multi draw interface", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f038fb60b479b8e48f88a243a66067f62f3d3cab", + "description": "gallium/u_threaded: improve draw merging by clearing pipe_draw_info fields", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "72ff53098c648ea5513fc2b150349b5b9ae58667", + "description": "gallium: add pipe_draw_info::index_bounds_valid", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "920bbfb3a007fe01155eddbd539bbd7bed95289b", + "description": "gallium/u_threaded: clean up direct vs indirect draws", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d5b37dd3c262443c8b842c4aa43d0d30117e21f7", + "description": "gallium/u_threaded: lift DIV_ROUND_UP to eliminate it for constant expressions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "abe8ef862fe5649e16ceed76f60a2a364949b7b4", + "description": "gallium: make pipe_draw_indirect_info * a draw_vbo parameter", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1a717dca04657e1f5a621afbe1ae391e3da067d5", + "description": "gallium: move count_from_stream_output into pipe_draw_indirect_info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "238ee7b801cf3f861871d7b7849c25e180da5894", + "description": "mesa: add Driver.DrawTransformFeedback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c77409a87e46a526cd2256439188deaf0ee2d4e2", + "description": "turnip: minor tu_queue fixes related to vk_base_object", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "008872aa309c015684653e498a8cc17a11f15fc5", + "description": "turnip: Assert about the storage buffer offset alignment.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fdfe4a4d307377f078a01d05a247c4f61d208be8", + "description": "libgl-gdi: add zink support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eebcd6a5021bf36bf7b0622c5d3ee1e5ce19d797", + "description": "zink: fix unused variable warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3d1260aa919e3b87c589321ab4e4f4ee594787e3", + "description": "v3dv: implement VK_EXT_private_data", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "30b6fbc496badc12e460022e32c6f21754120aef", + "description": "v3dv: use the common base object type and struct", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c3432ad852449ec31580a0b77af785e37eaa48f9", + "description": "radeonsi: add an option to enable 2x2 coarse shading for non-GUI elements", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c13370e8169b665e42dbb4177887125caf724666", + "description": "amd: add register enums for VRS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f054a261a86a57365100dab9e88969f4f35c1947", + "description": "ci: update some radv trace checksums", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b9d1db90e1147a254277fd222d93f5ba43f9a30", + "description": "ci: update some radv trace checksums", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5195a30d27ade4aabd3aa40b2dd9ac30e34a9e3c", + "description": "util: fix -Wshift-count-overflow warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "526405dfec6073b395f0f25ac13e84bd2a0fa80e", + "description": "meson: Treat LLVM headers as a system dependency", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "381824d14fcda979838468c4bb095f850e8347e4", + "description": "frontends/omx/av1: enable AV1 OMX Bellagio support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f1b23d8299ab88a4a981c0ccf4add68963958fb6", + "description": "frontends/omx/av1: add AV1 tasks management", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2874f873487b2c0ec8dc4672d76b24a9ba25193e", + "description": "frontends/omx/av1: add AV1 OBU header parsers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6ab3030f925e183a218d49b878b1841954950a63", + "description": "frontends/omx/bellagio: add AV1 initial support to omx dec", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7ed302819437353d23df1c3a12ac34fedb73739f", + "description": "radeonsi: cap AV1 support to SIENNA CICHLID", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "33690167a49027f8a32a1ac90db1e84cc51ab6aa", + "description": "radeonsi: cap AV1 codec configuration", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d78821b3bafbac885923935b3354c8b46bba697e", + "description": "radeon/vcn: fill up the probs buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "07a8d4634d629233e4a4ef0ca973d70214094ebf", + "description": "radeon/vcn: get AV1 message buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e08f6a18461cd2c99b762b57b2c373e10eab7f87", + "description": "radeon/vcn: fill up the context buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "057293a508618999796d81a0954f9c282c2f51b4", + "description": "radeon/vcn: add AV1 context buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7e7e753d275165471a1a25c90bb11f08ab782066", + "description": "radeon/vcn: add AV1 default tables for the context", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9a46b1086fb35043166cf38f8e47304460eb214b", + "description": "radeon/vcn: add AV1 dpb buffer size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6230407e82b955a76a9a96c6f3cc86d82bb6d90a", + "description": "radeon/vcn: add AV1 support to the decoder", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "80f145a0a7f07a07b1a4b9ce44f401a94e34e7a4", + "description": "radeon/vcn: add AV1 codec driver firmware interfaces", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0267a567f6d5a98ffdf600b3c53823030333d56c", + "description": "vl: add AV1 codec picture support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "87dc3106b077199b829a082e32ec33d0c6d400ab", + "description": "anv/image: Define anv_image_get_aux_addr (v3)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5fa0e5b87df52e112dc1ffa0ba34962ffca16540", + "description": "anv/image: Check surface offsets after adding each surface", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f0abc2a22422c4cdf4d9ef6a17bd288bddad674", + "description": "anv/image: Rewrite check_surfaces() [v2]", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "15642a52ce216a0043eb88447d65d33f8516efd5", + "description": "anv/image: Further split add_*_surface funcs (v2)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f2073c76ef0c1bb8d4c14add2ea33be2bc558c2", + "description": "anv/image: Define add_all_surfaces()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2bfd43ba0bf7ef43d73c0d4dc9174aad8f0522f7", + "description": "anv/image: Add more asserts to choose_isl_tiling_flags", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8636adf856eccb6d702666a29199fd72f47c7afb", + "description": "anv/image: Move some DRM code in anv_get_image_format_properties()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "40ac4c9cdc36067962e5e66c169bbd492f6b4a3d", + "description": "anv/image: Emit error message for non-2D DRM images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c892e242589fb93caec8198b093b434df4e11b6b", + "description": "anv/image: Drop redundant rejection of YCbCr formats with modifiers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c9f2a74b5a5da4ccfd69ce83ba1d39e106565714", + "description": "anv/image: Respect VkImageFormatListCreateInfo for VkImageFormatProperties (v2)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ce4f6bda66e32db8cc52233932062bf7bfd256e7", + "description": "anv/image: Fail earlier in anv_get_image_format_properties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8c5b4b1e1dbf73317bbcb4e778bbd900995d6a89", + "description": "anv/image: Minor refactor of VkImageFormatProperties::sampleCounts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2b3ec91326b7e9181a2c865e5e9e251466092c39", + "description": "anv/image: Rename get_wsi_format_modifier_properties_list()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "57d416d4236bac3bbae10fcf290f9502ca091a5a", + "description": "anv/image: Fix VkExternalMemoryProperties for images (v5)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2aa56905c6b4413a3ad66b77412fd28d34daef0", + "description": "anv/image: Delete the list of modifier-compatible formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "146f41e608c4b35a5f68f0f49aa08d7949cf0d4c", + "description": "anv/image: Refactor iteration over modifiers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6835cb7f86223e33aaece0e10be3e87b118f1af5", + "description": "isl: Make public the list of modifiers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "51eefbaae6d8d70a5774627e33f2dd7f81b34c99", + "description": "anv/image: Fill drmFormatModifierTilingFeatures (v2)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1f39b3e48da34b2401a385fc4de4145610e75da2", + "description": "anv/image: Teach anv_get_image_format_features() about modifiers (v3)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "486ae7c655e86d3f1b08f9356b11fe4eaee707bb", + "description": "isl: Add isl_format_layout::uniform_channel_type", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f665bae4eb34fa2d05f6e9ccdd90245badd12eaf", + "description": "anv/image: Use isl_drm_modifier_get_score()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "01bad67a940c234e325e17a37afe076b47ac9ad1", + "description": "isl: Define isl_drm_modifier_get_score() [v3]", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b50275a4b63d3da12ead2f6e57be8988223af83d", + "description": "anv/image: Fix isl_surf_usage_flags for stencil images", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "51a19c83b000407a31b5cd17b996084a6b58a4ff", + "description": "anv/image: Check DISJOINT in vkGetPhysicalDeviceImageFormatProperties2 (v2)", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "19906022e22cb37493861b6976c9623618b5b769", + "description": "zink: more accurately track supported blits", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "5743fa6e709a01c5a6820320b2e87931af46e7cf" + }, + { + "sha": "465a48a048ca96bb1f828686149e00597107edfe", + "description": "zink: always insert barriers for general-layout", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "11ebe2a572fe3a4bf4513391ddce4703d6ebd86b", + "description": "zink: mark general layout as transfer-read/write", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "249aed1ff067116d6279fe2484ccc52f0405840a", + "description": "v3dv: rename playout and dslayout fields to use underscores.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ba2e979b5ce0e813c2a00c06514a7a1b8b1621d9", + "description": "v3dv: blit shader clean-ups", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "840ba2513a21b29aaf7e5a190a838e4111862be4", + "description": "v3dv: initialize pipeline layouts for meta operations at driver initialization", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ba69c36ada5609ef2a43bb34ce9b48786c083aa9", + "description": "v3dv: add a buffer to image copy path using a texel buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6304c08818e9e571f0e73cbc814deddd1d0c416a", + "description": "v3dv: fix width for buffer view texture state", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d25d097d3d524e1e8c5ab3f9dc91cf155fb0687a", + "description": "radv: don't subtract max_verts_per_prim from hw_max_esverts on gfx10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f777d00a756d72cc01571ca94efa8afa3784fd4e", + "description": "radv: don't count unusable vertices to the NGG LDS size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c5e8f6700bf4c50cd273ed2f6c476c66557ccc37", + "description": "radv: fix applying the NGG minimum vertex count requirement", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0790105f2f0d1813dc2ecaf5e12e7131730a8bc1", + "description": "radv: do VGT_FLUSH when switching NGG -> legacy on Sienna Cichlid", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "68f152cb9a65ab20f03d5efb21d6f7ad3cb16e25", + "description": "mesa/gallium: add MESA_MAP_ONCE / PIPE_MAP_ONCE", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "addfe49fdde0402055b5b9d6f2846cb2f37954af", + "description": "radv: Fix radv_queue_init failure handling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aaecb65b89f304c187f223efa14e11b5b175a867", + "description": "panfrost: Don't expose fp16 support on Bifrost unless explicitly requested", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fee4e991fe9b905edeb31f3bc5cf1fef32e9e5f1", + "description": "pan/bi: Stop extracting the immediate attribute index from src0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "549a59f66e2d132e55b62017078734dbf0322a32", + "description": "pan/bi: Add a varying_index field to bi_texture", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fb01328d30eb038023a0fb5bcbc80f7382754ab2", + "description": "pan/bi: Fix LD_VAR with non-constant index", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d86973d92a9021c2bb1e0b8088cce7db3b1ae5be", + "description": "pan/bi: Stop passing special varying names through src0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1176cc12978d78025a435aa4d17bb90c1045df9d", + "description": "pan/bi: Pass LD_VAR update mode explicitly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4321b4fc935c2ec4946d47d5e606dfa4810d7aaa", + "description": "pan/bi: Move LD_VAR packing out of bi_pack_add()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "058bcf44062f81ec6a42025ef9e0957a180ad4bb", + "description": "pan/bi: Set roundmode to RTZ for f2u operations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "00a6a9bdf8d74eebc1ffbcd57f400859bbb0d9cf", + "description": "pan/bi: Let the GPU pick the right format based on the varying descriptor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aa2156f9493a67d8a00d168e3e11783c9bb4fa1a", + "description": "pan/bi: Support automatic register format", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d0cd8bf2a54ee5813afa72b76475bb04c269b481", + "description": "pan/bi: Support txs operations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "045ae5434330e864a5ee97c60ba7ecac5a26cc46", + "description": "pan/bi: Don't use TEXS for tex operations with a src that's not lod or coord", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5ad9f95f24991fdd667cc66a3a3950cf9edd8da0", + "description": "pan/mdg: Try demoting uniforms instead of spilling to TLS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "69cad1f96ef4481cc2395def9c993ddcbb0e2540", + "description": "turnip: Close sync_fd only if it is a valid file descriptor.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "cec0bc73e550859635a71d330625bccdf71e87ee" + }, + { + "sha": "71ee4e2853bc4af270969e6d91533cba7b9c0cb4", + "description": "clover/spirv: Add missing break for SpvOpExecutionMode case.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "ee5b46fcfdb4df3d28839499e5945178af9b76f7" + }, + { + "sha": "7820c8c13ff3e9367b208f09059c940a1629f708", + "description": "frontends/va: Fix *num_entrypoints check.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "5bcaa1b9e9707aea7be73b406345bb9e46f92a18" + }, + { + "sha": "1beb477908c49ae962017d61bb4ca252640c9018", + "description": "freedreno: Disable PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1f44053301339e64ef070ac8ad81a0ba3f073310", + "description": "freedreno+turnip: Upload large shader constants as a UBO.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "17db969f7a39c128bf02238a7a6f1243d78476f1", + "description": "freedreno/ir3: Fix incorrect optimization of usage of 16-bit constbuf vals.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "386998cfbf068706237e0f274be6f3b874e9d66f", + "description": "freedreno/ir3: Switch emit_const_ptrs() to take BOs instead of prscs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a9b37e5dad6652e7cb404da6a0452dfd46533d04", + "description": "freedreno/ir3: Include at least 4 NOPs so that cffdump doesn't disasm junk.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "51f2b11b043da05e12f26d8466e68e55f0d17c3b", + "description": "nir: Add a size_align helper function for aligning elements to 16 bytes.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "433841d9eb7cd9dd5e00b2f47cd998e0e986ef02", + "description": "freedreno: Fix leak of shader binary on disk cache hits.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "f97acb4bb4b18f127b62aa8eeb57cdf3d8fe3aa2" + }, + { + "sha": "b3daf341d4d0f8f5f97f48b095b4ac3678b9f1f9", + "description": "intel/fs: Add assert on the brw_STAGE_prog_data downcasts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "671c850310523bba93f1ff18739ca69a62f8c63a", + "description": "spirv/cl: add enqueued workgroup size.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2dd3fde56d45836766d17fed6e95102ba48a4314", + "description": "clover/image: handle MEM_KERNEL_READ_AND_WRITE flag.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c5a33ed8c20724cd1be0451d4f3fd9f6da047b03", + "description": "clover: add CL 3.0 event/queue queries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a8bad2b71a25246ac756c93014d631ee4f8277a3", + "description": "clover: add 3.0 program properties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bd804c074fd90966a869d00961fce1a5a0d7defc", + "description": "clover: add device/platform info for CL 3.0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "39940ee8d6a18f4b19d98ca7ffcba137d8b33a62", + "description": "clover: add cl 3.0 SVM invalid support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a144dd69172b0c4a5858f3025d1b82c9281661fe", + "description": "clover: add all CL 3.0 API with invalid functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e42a7fa03786603b5069afb762006623d71ae7e7", + "description": "clover: add support command queue properties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0272b6b1ba001741e8550a5d1f8f47d47db588cb", + "description": "clover: handle memory object properties properly.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6fd20a0281de0fb2e2145c1bbffe606736c7e9d2", + "description": "etnaviv: drop nir_print_shader(..) call", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b479a1f03cdcf70b86df6903ace6ff8c92f205b9", + "description": "etnaviv: fix disabling of INT filter for real", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "89a41dae7702731bee298288f3acbcbd56096b30" + }, + { + "sha": "ad6365f656b44c8daf788582ba8a91b72d9710c0", + "description": "docs/features: document d3d12 features", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "867323379e353a58e76a1340071751c43431f215", + "description": "aco: don't use SMEM for SSBO stores", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2410def98fa688aa009b42de3224e12e5ddbaff3", + "description": "mesa/main: add missing include in glformats.h", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "1bf539b3a2129779addde397886870c00045e6d7" + }, + { + "sha": "2736f974962c293ce76dcf309ab78dfb4b198516", + "description": "aco/tests: add output modifier tests", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0c522d3aa7432b72c2ed773218d6f0eb25f58943", + "description": "aco: fix fp16 *0.5 omod", + "nominated": true, + "nomination_type": 1, + "resolution": 3, + "master_sha": null, + "because_sha": "1210e0bd6205c5f5365a29c91425dea3e49d89a7" + }, + { + "sha": "558daa73f9adb1275ddcf00515c7f79f726b7ae1", + "description": "aco: disable omod if the sign of zeros should be preserved", + "nominated": true, + "nomination_type": 1, + "resolution": 3, + "master_sha": null, + "because_sha": "df645fa369d12be4d5e0fd9e4f6d4455caf2f4c3" + }, + { + "sha": "5c8fc0b1f4ba04735a6cafe8352376ccf81bbbd4", + "description": "Revert \"ci: Temporarily disable jobs on the Collabora lab\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "902ac3d7c5ab75f2ff31bf23d1d5add7b5c377ed" + }, + { + "sha": "3f91f4e2ab6cb720c8fe7e5151794ca8defa88c6", + "description": "nir: don't consider txf_ms_mcs a query instruction", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ff3b4f6683cffece1d97dc7994e7161fd3e09d92", + "description": "util: fix unknown pragma warning on msvc", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2ec290cd92a9796c9308e14dcc9bf01cb199f670" + }, + { + "sha": "2f5b3ac2f85dbbcc25ac3709560aa5db3ed24193", + "description": "aco: remove v_{add,sub,subrev}_u32 on GFX8", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e7f8c195d80a0c981f63e6c527d6bd4dc83249b5", + "description": "microsoft/compiler: Fix reference to renamed intrinsic getter", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b9c61379ab4c5065d624fb9403c1df9d5589b313" + }, + { + "sha": "2ec290cd92a9796c9308e14dcc9bf01cb199f670", + "description": "util: Fix/silence variable shadowing warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4e87e7863f8eee004b5b2ce0f13978f105ac0f0e", + "description": "glsl: Fix -Wshadow warning", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "902ac3d7c5ab75f2ff31bf23d1d5add7b5c377ed", + "description": "ci: Temporarily disable jobs on the Collabora lab", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "460287adcadf1c03b13fccd397fcd9d270c4d31d", + "description": "iris: initialize shared screen->vtbl only once", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "959c2d1edbf56934f6d313a48136f6f1740dc549", + "description": "egl/dri2: fix race between image create and egl_image_target_texture", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "da9fbbac42f4531f66165aa810257f6ae2661bd1", + "description": "lima: define set_clip_state implementation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f586a8efb7585475bde2783a8f412d4ad6a3e752", + "description": "gallivm: fix float atomic exchange.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a6f5ebe284a054d913558e48a47c973e942025c", + "description": "gallivm: lower vector compares", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3502bf47b26b0ad3905c6da6459144b8cbd5ef37", + "description": "gallivm/nir: lower dot products.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2a3fd242b06283867feab9ba7831eb76c0fe8098", + "description": "gallivm/nir: add fsum support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "53064ce6b5f277364b29e4b6f206cfcb4fc717a4", + "description": "gallivm: add float to 8/16 int", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ce07c52b825903e698b63bc608d7ddb8cb578cd6", + "description": "draw: fix tess eval pipeline statistics.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "202bc38ce9e3e52cea1448f22882d17c7e5e9a90" + }, + { + "sha": "6925c031e0e6420baf7f046e3100d6ca4c6d55e1", + "description": "ci: update the list of expected failures for RADV", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ad0142db2483d444b5289d30d52922eefe93a8e", + "description": "ci: update the list of skipped tests for RAVEN", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a4dc4ece6303cc548fcdd29b2851ee2be6454f1c", + "description": "radv: Use internal drm_fourcc.h", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0833dd7d124bfd94161bff15e5b4293987920fce" + }, + { + "sha": "9b6516ac249ee479b78e399e1a54a293e53e9676", + "description": "etnaviv: nir: do not run opt loop after nir_lower_bool_xxx(..)", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "035e21e780205bef4d922f4ae346f284397247de", + "description": "v3dv/pipeline: take into account precision for the output_type", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7da854e1864f61d821dc51ef0dba8b465d67ee4a", + "description": "v3dv: remove combined_idx support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "429c33641275e02b17351ab24a693f2469cc36f3", + "description": "broadcom/compiler: separate texture/sampler info from v3d_key", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1fe52af61a4e7bf17ea6a37159b1fe1dab850b1b", + "description": "v3dv: cleanup/remove support for pre-generated variants", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5169dfd1fdeb8dee2f621b45ca73093b88f0296d", + "description": "nir/lower_tex: clarify nir_lower_tex_options indexing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f7b122728d5e015bbd706505e60600a864b43ea7", + "description": "drm/uapi: Fix modifier field mask for AMD modifiers.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2cc2b456889dd2f1374018beea0afe53d762ec30" + }, + { + "sha": "f0c8645cb93bfb29771606b9a6aca4aa1a06d30e", + "description": "v3dv: Remove unsigned comparison to zero.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3fe5c13d71b831b42798755a3ba983b5de6d1563", + "description": "vdpau: Add missing printf format specifier.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "89b9863252272a7109d468738517a0d68376a909" + }, + { + "sha": "5adba503ddf0dea1c34d11c0cf505eff3a2eb70c", + "description": "nvir/gm107: Initialize SchedDataCalculatorGM107 member score.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4b65c09d86d37efa34e93b17b0e4a03546f26af9", + "description": "freedreno/ir3: Fix crash in shader compile fail path", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "74140c2e859c15593a379cbde96b1048f2ad69dc" + }, + { + "sha": "cf9ef90066592333a9113d49328be29583308a60", + "description": "freedreno/ir3: Add pass to deal with load_uniform base offsets", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4bb5a6c30a79f9a2d95b7c61addac7caecff9219", + "description": "intel/blorp: Delete clear color conversions during copies", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8ed2a241db4d1bdebfc51b8d7b7a0e60ea8bed36", + "description": "i965: Disable color fast-clears for miptree copy", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7779b1d71bf053f0c73a1b717e6d2ed91f948378", + "description": "iris: Disable color fast-clears in iris_copy_region", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "12dec2004ed0b50b0cb0ad7cc635f2a706be9891", + "description": "panfrost: Fix stack shift calculation", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "e6152091ca99c3d8491ac48baa1f301da6f4a4c3" + }, + { + "sha": "24c553c9753841bcd56409d433aad1e29cc1b3ac", + "description": "panfrost: Fix out-of-bounds read on SFBD", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a5191f30bc09a009dc24bcfb0ea7ddfb2ef402c4", + "description": "panfrost: Remove panfrost_can_linear", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "826969ee0269f8e65a2ae2a436c6c6879727a645", + "description": "panfrost: Implement linear Z/S for SFBD", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0fe2d49ccddf9b35c259a5466b729c6237a0e378", + "description": "iris: Use converted depth in clear_depth_stencil", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "667813431c001db0445838a8545565e01726bcd1", + "description": "iris: Add and use convert_depth_value", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2e713313a29422b38435c91f8277c1893fcad095", + "description": "mesa: Clamp some depth values in glClearBufferfi", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1bf539b3a2129779addde397886870c00045e6d7", + "description": "mesa: Clamp some depth values in glClearBufferfv", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fda015023af5d0d2d844d5fde07a8c19e4d8f453", + "description": "mesa: Add and use _mesa_has_depth_float_channel", + "nominated": false, + "nomination_type": null, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf5cea7232f9ee2934c212211ebefb6fe766526d", + "description": "nir: allow reordering of loads from read-only modes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eb7507681f8d94727160b6627f6fb032a7f4305c", + "description": "nir: add nir_var_mem_ubo to nir_var_read_only_modes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0ef5f3552f68076747211ec81f687b37a5540d8e", + "description": "nir: add strength reduction pattern for imod/irem with pow2 divisor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e96afeeb7bb2306aa0fba76e27163d4c7a86860b", + "description": "zink: update shader modules in gfx program when flagged dirty", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eeff625ab31f0a1bf94739e09ca6b08de00c94f5", + "description": "zink: put those shader keys to work fixing up fragment shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9aa08221fa6653002dec2a07641d58d3c7fa227a", + "description": "zink: fill in params for fs shader keys and flag shader for rebuild", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ed7a5a55689ddcf10aae3f671e390252aad2d62f", + "description": "zink: move shader key structs into their own header", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9fdc21bba0724271520462f3f04ba72ae106a26", + "description": "zink: refcount the shader cache", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2be2a500a396fe1dc0e121816e4056874cdd43fc", + "description": "zink: initial implementation of shader keys", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "54a965b153f80cff792853f262a09cfc7987f777", + "description": "pan/bi: Add support for tex offsets", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ed057ca3d7b37b9571eabcf6aa0466f2c81bb493", + "description": "pan/bi: Allow lane selections on component 4 and above", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "38bebba18ac1f59f65591a96bd97b916ee8cd995", + "description": "pan/bi: Allow vec16 in bi_print_swizzle()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "55cb921f9233acb098e06f95bf8cb2f10fccee1e", + "description": "pan/bi: Add support for derivative instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6914316e9af9f2bc9de78c13b78613a43727e0dd", + "description": "pan/bi: Add support for the CLPER instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3f8a7d9582f571527dcaa6ca08842f57efbfe292", + "description": "pan/bi: Rename CLPER into CLPER_V7 and add CLPER_V6", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f0e3de7e91aa2ad9558ba8e30247e4b0ac569521", + "description": "pan/bi: Expose FAU slots", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f640c0f3dc4ef670f9a63cb30e5b5766ad3be6b", + "description": "pan/bi: Store the architecture in the compiler context", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0dd093a89fea15303e98ef70b0de8370d7e39fb5", + "description": "panfrost: Get rid of the Pixel Format descriptor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6e069d1c3febebb7996240ad5ce76b352ea2fe8d", + "description": "panfrost: Fix panfrost_format_to_bifrost_blend()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8379ff292c4544bfce22bb795311bd180a6c999c", + "description": "pan/bi: Force BLEND src0 to r0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8be54474619ff36230a7ed5f38a6bb3dc7f77a5e", + "description": "pan/bi: Extract shadowmap comparator", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4d727ee9131ba8783e14a1cff3cb2c1ee3800b2a", + "description": "aco/tests: add some more clamp combining tests", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "15d08a06e28f0b7d674b28aee364f24481851604", + "description": "aco/tests: expand optimize.const_comparison_ordering tests", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6bf3c606be627d144bb9ef8e45ea1039e9b59241", + "description": "aco/tests: initialize debug function", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "966732e8cae21ff7f5385cb776845acc3f6821ff", + "description": "aco: disallow various v_add_u32 opts if modifiers are used", + "nominated": true, + "nomination_type": 0, + "resolution": 3, + "master_sha": null, + "because_sha": null + }, + { + "sha": "91ffeed88a4eccfb92bd1cc4a5a3169129128432", + "description": "aco: fix combine_constant_comparison_ordering() NaN check with 16/64-bit", + "nominated": true, + "nomination_type": 0, + "resolution": 3, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d4c821da0e5924c6604b63dc923b3e8541e1f3aa", + "description": "aco: don't combine precise max(min()) to med3", + "nominated": true, + "nomination_type": 0, + "resolution": 3, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6e7e208867ecb425a8d18496fbfadc64cf6edee8", + "description": "radeonsi: remove AMD_DEBUG=zerovram flag", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9605f1a745c6b599739473aeb26b302d29931b2", + "description": "radeonsi: remove unused NO_RB_PLUS flag", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1cf1ece738c5e533ea95a5c9b34441124de1e560", + "description": "radv: add img debug flag", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dc93fd759a5571c88304cf99d8e4028fa529c590", + "description": "radeonsi: use ac_surface_print_info in si_print_texture_info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "92470b3d74743fe177e4d212278e3f7deced97bb", + "description": "amd/common: introduce ac_surface_print_info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ee657df09a966678dc5245c5685cab0aaba09c0f", + "description": "meson: verify that d3d12.h exists when building the d3d12 driver", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2ea15cd661c8355e8e35624eba0bf10cbcd57f61" + }, + { + "sha": "314f18b22ab4c2776da8e08a1e0913db14ce6e25", + "description": "microsoft/compiler: correct typo", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4c82cdcb7e8ec71f96b2a040e046ff798f4d2d62", + "description": "microsoft/compiler: inline some struct-declarations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9a99b22aa1044155e40030b265f7ba49dda48cf", + "description": "microsoft/compiler: move c++ higher up", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2a35b2d20113425d42a4bc3f26eafebc88cce73", + "description": "microsoft/compiler: remove unused struct", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "68488fd3835f2d1a63562ae16777987af3ff7e01", + "description": "aco: optimize v_add(v_bcnt(a, 0), b) to v_bcnt(a, b)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "580fbbb59a601c5eb858a53a4924c87da7e253f9", + "description": "driconf: remove the redundant glx-extension-disabling options", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3fa3bc19a28347c67411d494cf286c54b8dedf85", + "description": "radeonsi: Add auxiliary plane support.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "605140e4016108b6ac1ce2ca5b55baf8a1ac418f", + "description": "radeonsi: Do not try to disable displayable DCC with modifiers.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "23b59b6f87c6bffca62c2c78cab9881d8ae75425", + "description": "radeonsi: Do not disable DCC when we have it as a modifier.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c786150dfa514c35b0c384cc5184282d58c1f8bb", + "description": "radeonsi: Add modifier support.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f7a4051b836294ab8f55c71d239e99c5cbf63170", + "description": "radeonsi: Check pitch and offset for validity.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "395dac7bf9888db1daf425778f08cbef1ba26d7b", + "description": "amd/common: Add modifier tests.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0833dd7d124bfd94161bff15e5b4293987920fce", + "description": "amd/common: Add support for modifiers.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2cc2b456889dd2f1374018beea0afe53d762ec30", + "description": "drm-uapi: Add AMD modifiers.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d4f7962d48b46d34319f75bba03fad22c30efdff", + "description": "radeonsi: Add displayable DCC flushing without explicit flushes.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3e2dcb3c076963e432ec2771e7ac1cf965de512b", + "description": "amd/addrlib: Use signed char for INT_8.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9acfbe302265122333d097f51b4eac89ecd0ce24", + "description": "radv: Do the sample check for tiling earlier.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dad6b625768f2b68e1ea5c31da4e0e1415ef41bb", + "description": "turnip: Fix file descriptor return.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "cec0bc73e550859635a71d330625bccdf71e87ee" + }, + { + "sha": "fe3b5241a48674e21bbbad950a7ff1ac9ac49494", + "description": "radeonsi: enable GL_EXT_demote_to_helper_invocation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aa757f4f8c367f408bc02b0daf4787b61dd899d6", + "description": "ac/llvm: fix demote inside conditional branches", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cb20d58f45c22d48208805ed7df8b98aee525189", + "description": "nir: optimize nir_lower_discard_to_demote to lower discard/demote both ways", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d5039f99b4f14c3c5628d2e44ea7ba3bfa132668", + "description": "nir: gather shader_info::needs_all_helper_invocations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "baa5807e363d3cb9906f2b123d17ae7e4b7040c1", + "description": "nir: rename needs_helper_invocations to needs_quad_helper_invocations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "96c12b7dc20d05dff94a947851f08d9ccbfb72ad", + "description": "nir: optionally shuffle local invocation IDs for compute quad derivatives", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "99e17b0c4adb81f93ba9b98b754ac71f6f334c3c", + "description": "radeon: fix license in header", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0b4825c872558b1e21f6b9c6e8256482988ee243", + "description": "gallium/draw: Fix rasterizer_discard for wide points/lines.", + "nominated": true, + "nomination_type": 0, + "resolution": 0, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9edb6e1be0f8ac65496e1f0da6c78376d81d70ff", + "description": "mesa: Fix vertex_format_to_pipe_format index.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "e6448f993b157a6d3757b69d5d4c2424e8af737e" + }, + { + "sha": "6e6dab479907fe79ed24e69be841dd3ec7bd479e", + "description": "nir: handle float atomics in copy propagation pass", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "8424cd8fbd1671c4c13f57cfa34bf8145d0fffcf" + }, + { + "sha": "8de279f8dbc3403708e28dbdcde95005e697fa2a", + "description": "freedreno/drm: Add some locking asserts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9eb2ae5d21f005c7e89037d4099d537527c3bedf", + "description": "radv/winsys: set has_dedicated_vram in the null winsys", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6359d2dc32d625319921d7232197764907bb5a4", + "description": "nir: Fix nir_validate fail after nir_lower_tex", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "53660e4c4ed315d4b2fe5d921861fbfebf2bbe42", + "description": "Add EGL xcb platform", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8bb1a75b4f0af2d27d92c14ae94dd03c78e98960", + "description": "anv: fix a build race between generating a header and using it", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5d2e9d76c1dfbbeac769a42ecd5ddb2244005b91", + "description": "d3d12: fix code after simple-shader helper changes", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "4e9328e3b6a917c2a2b27630a25fbc44f822a6d0" + }, + { + "sha": "5f999625407e8845cbd919b0b43b647e8e410e97", + "description": "zink: fix build on windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "db9d13b4ffd35cbc8ecf90e1b930fe6b1392275b", + "description": "aco: optimize v_add_u32(v_mul_lo_u16) -> v_mad_u32_u16", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "20e48551acff92f07109630e134f59c81107e6bc", + "description": "aco: select v_mul_lo_u16 for 16-bit multiplications that can't overflow", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7028e9875f88d4d60aeb0e3bdfe7873fef5678fc", + "description": "aco: select v_mad_u32_u16 for 16-bit multiplications on GFX9+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bbdafd6ab3541d2a61fe3c2389f6dec55814c34f", + "description": "aco: optimize v_mad_u32_u16 with acc=0 to v_mul_u32_u24", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0ea763a727d671fa7dab2e37b7592fc045a53396", + "description": "aco: add a new Operand flag to indicate that is 16-bit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bda35ae6b92caf24a26c96e05a49f1bac42955cf", + "description": "aco: introduce a generic label for labelling instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1a5400a9e9568add74dac093c71dd59eefc1cb42", + "description": "util/u_process: implement util_get_process_name for Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d442a99238cdbee1ee3c5d069949699936d33597", + "description": "ci: sort packages installed via apt-get", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "87fb1ec35279470f692bdda03c6868186db96e07", + "description": "v3dv: move authenticated display fd acquisition to swapchain creation time", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fea89f4fedeab7dc02553e0bddd3b8b428fa8c18", + "description": "v3dv: fix typo", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dfd878f2badac012a1ccdec28da9795d893d337c", + "description": "aco: combine more s_add+s_lshl to s_lshl_add by ignoring uses", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "64748a2be29de11cf1360c4d4b42d491e88f02e9", + "description": "aco/tests: add some tests for combining s_add+s_lshl to s_lshl_add", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f89b29f708cbc8140f3a5f4520f5dd4e5d402e0d", + "description": "prog_to_nir: Revert name initialization change", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "eda3e4e055e240a14c6ad4bdbde544c6348fc01d" + }, + { + "sha": "2009258796af63477b0680d74cfc55eaf4ef8558", + "description": "intel/compiler: Fix passthrough TCS regressions from program rename", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "eda3e4e055e240a14c6ad4bdbde544c6348fc01d" + }, + { + "sha": "50175716d26273d273b54627715465e74cb45ae3", + "description": "microsoft/compiler: Add dxil_nir_lower_16bit_conv prototype.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b9c61379ab4c5065d624fb9403c1df9d5589b313" + }, + { + "sha": "c432d4814ad7ffadb89b82defd49685c211b3515", + "description": "glsl: Fix typos in comments.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e7aa3cf8283088809625cb84fa78f1d301f35311", + "description": "glsl: Initialize ir_variable member field data.is_xfb.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "904dcfd914ccacc4d7b834a8aa1a5699eb51d10e", + "description": "draw: Clean up single-use goto statements.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e9caba6ce54d35870ef2fc555d00e2ccb45e40c0", + "description": "intel/fs: Fix use of undefined value in fixup_nomask_control_flow", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "a8ac0bd759cbf9a5984df4bc9f553a3dca41a8ab" + }, + { + "sha": "dd34d28de72b590c6b9a425f7829210bb8d2e2f1", + "description": "ci/db410c: Fix networking so we get artifacts from our jobs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6ccaca36e8961e4c96810352a5c5dfead61cc0a", + "description": "ci: Re-enable the clip_three test on non-freedreno ARMs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c189d385ce306cd776f2e625fa955c1aba01871a", + "description": "ci: Bump deqp to current vulkan-cts-1.2.4", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6bc35c00e2245ba13dceeabd4a6bf75e4415812b", + "description": "ci/deqp: Allow specifying the caselist fraction separate from CI_NODE_INDEX.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2998a0b05525640dfcde7cfc0ecfc3ee39993388", + "description": "ci/freedreno: Group the short a630 dEQP runs into one test job.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf576b449e1d5cf685bfb5a14b05d1ee03d32cb0", + "description": "ci: Only install kernel modules for LAVA devices.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "60c5729d1669f9d91cc77d71f8e1893f2c0d0c94" + }, + { + "sha": "737d2b704b89e98caee6d6a546090174085a8baf", + "description": "ci: Move the rust cleanup in lava_build out of the middle of kernel build.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eda3e4e055e240a14c6ad4bdbde544c6348fc01d", + "description": "nir/builder: Add a name format arg to nir_builder_init_simple_shader().", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5f992802f5130352e903218cf3541e429b87cae2", + "description": "nir/builder: Drop the mem_ctx arg from nir_builder_init_simple_shader().", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ef5bce9253e2221ef8df0764447cb02c02fa2a16", + "description": "intel: Drop the last uses of a mem_ctx in nir_builder_init_simple_shader().", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f372572a198d5e5762c74d06ebd29666da3206b", + "description": "nir/tests: Simplify the mem_ctx setup in our unit tests.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5b9c7586f4f6bea692b9b1bb13d3c88ebb632435", + "description": "nir/builder_tests: Drop unused lin_ctx.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4e9328e3b6a917c2a2b27630a25fbc44f822a6d0", + "description": "nir_builder: Return a new builder from nir_builder_init_simple_shader().", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1aa1c1aec2ab61e0fe0e7d32b371e85c96e3d6f4", + "description": "nir/algebraic: optimize bitfield_select(a, iand(a, b), c)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "68d70fb96996c41a71ed81b7f6ccbec0fb95145b", + "description": "gallium: Add format modifier plane count query", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6ee10ab3de8664011a6c75e2357faba1ec95c094", + "description": "gallium: Add pipe_screen::is_dmabuf_modifier_supported", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "40e3eb9be96ec69259a000d08fe4a853a330007f", + "description": "mesa: Undefine ALIGN macro before it is used as a function name. Issues on MacOS.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d6a8a6ed4ec659068eedd61d0e10f49a78bd5a21", + "description": "v3dv: VK_KHR_display extension support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d186766c08867d5447e32c427ff092612bd4ba92", + "description": "llvmpipe: just use draw_regions in draw/line setup.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "01c4bac36e767dc96aef87fa7445a34c7c8f9132", + "description": "lavapipe: disable SNORM blending for now", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a04a146560717014937d9e13ad24eb33cb6f0a44", + "description": "lavapipe: enable alpha to one.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ea034c981b9649c4999e3b7f4164f27213e736b3", + "description": "u_blitter: port radv 3D blit coords logic.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4b1d23b24343dbb4e6c579502c712d86ba07903f", + "description": "gallium: handle empty cbuf slots in framebuffer samples helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "46d2f2224fd6129aa44b8f34c593ad74520681fe", + "description": "zink: only add MESA WSI structs for specific devices", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eba97645c9f22c890d7b413dbe81b532c04d99fe", + "description": "nir/validate: Size the set of blocks to avoid rehashing.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2afdd94f86149295f3e9422672c4501092f671d6", + "description": "util/set: Fix the _mesa_set_clear function to not leave tombstones.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "5c075b085585b1629022d40ce7af9448c7d0e906" + }, + { + "sha": "4618ca82c37345a5823f5f59022c650f02cc6c0e", + "description": "util/hash_table: Clean up the _mesa_hash_table_clear() implementation.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9c5422c7dd6e3d272075ac3dc7e726b7764159a7", + "description": "util/hash_table: Handle NULL ht in _mesa_hash_table_clear().", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3a512c34d2fb933a51cd22613833b456c402755b", + "description": "clover: add support for opencl C features", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f1b91177fbdc97289e9ddefa8db4b18fdc88f8f0", + "description": "clover/spirv: avoid strings for version handling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "330c52476292d2cfba15499f2f74ca224f227260", + "description": "clover/llvm: don't use strings for version handling.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b8a96199cbf0a81011c35c594becae083626206c", + "description": "clover: add platform supported extensions with version", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "940e479d7d312e8a919c09958e07b9feccdc6a33", + "description": "clover: add support for versioned device extensions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3a9fa5b36d7b114c5ec15343edfacf8c076a45e4", + "description": "clover: report device CLC versions for 3.0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "54449180982bdf644aef11bc59aed313edcf17f6", + "description": "clover: add CL_PLATFORM_NUMERIC_VERSION support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "019130ed71604827b385c5dba5cbc879609880e2", + "description": "clover/platform: move versioning to core object.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2a3a0322aec1024e67d67c27a7bb5cbecbf34957", + "description": "clover: add CL 3.0 CL_DEVICE_NUMERIC_VERSION support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "981f8d7252c17672598ec7ee4406e296876f8ab0", + "description": "clover/device: store version in device at constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "297ad1bbb310a580b888b8f38488f50740b5df90", + "description": "util: add a env getter for versions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7507ce1db41239b6f986bc6fb94ccf9c68b88acb", + "description": "clover: rename platform/device apis using strings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "20a3ec2d771d7f8942018b714c64925a870a2062", + "description": "clover/queue: Flush automatically if applications do not flush themselves", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e8a1aa9e940af5706fc78d77c4cd509c2f480a6b", + "description": "mesa/bufferobj: Fix valgrind complaints", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f5ea96b5848b3b024139728c0c11ed6d27344ee9", + "description": "mesa/fbo: Fix valgrind complaints", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9cef302aa04fb7cb23729d269ba13ecbe6b33adf", + "description": "util/threaded_context: use driver's ubo alignment for constant buffer uploads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cb034ae44f4a1f3ed036ba71fc42746efb4cd775", + "description": "freedreno: Protect gmem_cache ralloc allocations", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "91f9bb99c5e0db38a1ac26dd9e6a967312f5e8b0" + }, + { + "sha": "13d509c7e66439e3e85d24f3326c037a47d0ffc5", + "description": "freedreno/drm: Rework APPEND() macro", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2625ba064c44c460610c5562e1c42b01c8befd7d", + "description": "freedreno/batch: Cleanup submit immediately after flush", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "06b918153db0a6067f7584f7b75e84731d14b058", + "description": "freedreno/drm: Drop growable submit_bos table", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "02298ed1fc640f89d66c1e22e68e1f9fecc14205", + "description": "freedreno: Add submit lock", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "290f827928b3845bb50e4bd2f27b878e136af36f", + "description": "freedreno: Make fd_context_batch() return a reference", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "20a813b0eb5e3feb01158acb866ad1e6c6717830", + "description": "freedreno/batch: Move fd_batch_get_prologue()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b2f4bf010574c501f93697d90f182a9bc6c83356", + "description": "freedreno/drm: Make ring refcnt atomic again", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b1b8e0f8d158758ea9756e226ea807ea296cb787", + "description": "freedreno: Use ctx seqno in batch cache key", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1783a156c806829f6a9a2d6426a009e1340eb66e", + "description": "freedreno/a6xx: Texture cache locking", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "51665dee8c79501a1ccbed2766b3bbaea49b2b39", + "description": "freedreno: batch-cache locking", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3f88abd621ef60b7d0ed8b82445d630abed6a810", + "description": "freedreno: Fix spurious flush", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "156d7e45f745a1567fc3b69a4f6faa3c54eec49e", + "description": "freedreno: Convert to mesa_log*()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a8b9860a974117771f79faa6d4f7c9af0e585d4f", + "description": "freedreno: debug cleanup", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "78b3f58c9985d14d11e580277cb5b2cf61234e21", + "description": "freedreno/drm: Convert to simple_mtx", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "57a2a5db81c614ac84c300c200639c51f218fb8a", + "description": "freedreno: Drop fd_context_lock() and friends", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d007349f910970ac14b20415fe76a84fd0f8fd5", + "description": "st/mesa: fix use-after-free when updating shader info in st_link_nir", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "549ae5f84375dfadb86cfd465f0103acfae3249f" + }, + { + "sha": "6f3716d677981bc2ac8f9842f302e9c125203bc1", + "description": "d3d12: avoid searching twice for bos", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bd5f92850614bc984a7e560849b19b31e94725a0", + "description": "d3d12: ensure all compoents of clip-distances are written", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ea15cd661c8355e8e35624eba0bf10cbcd57f61", + "description": "d3d12: introduce d3d12 gallium driver", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3f31cf64e49d9de57ea7dc2b0447840255bbca9e", + "description": "microsoft: add resource state manager utility code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9c61379ab4c5065d624fb9403c1df9d5589b313", + "description": "microsoft/compiler: translate nir to dxil", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dd5fe2f3de420778555f1211836c5ac076257f1c", + "description": "microsoft/compiler: add dxil-util code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "10dfd3cfb4bff1a98e6374d93110f039b4cef621", + "description": "compiler: add SYSTEM_BIT_FRONT_FACE", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "07d189c4169623f3a2b32f4f62d82d860457e30e", + "description": "gallium: Wrap some header files into \"extern C\"", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "449c4baf5011335484013244069a878de1f2188e", + "description": "nir/print: print GS extra info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e60df9fe1696ff0b533eaf09d0b06720ce373352", + "description": "util/format_zs: Add C++ include handling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "51f4a4bee10ebd6f526fccc4c02356c08be81747", + "description": "gallium/util: Wrap suballoc.h into extern C", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f4ff66d75f529639030747782245a52c0ac50632", + "description": "util/slab: allow usage from c++ code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c88e314a2d4355df99dd0e7dc674042ff883042b", + "description": "gallium/nir: Wrap tgsi_to_nir header in extern C", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7a1346b26a70a9d6000f4889ab4d594a6a445d6d", + "description": "softpipe: correct signature of get_compiler_options", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "73bafb5ee38964019c3da2000095608a7fb4b554" + }, + { + "sha": "f23574af2c65b9c0342656cb543bcb025cb38d9f", + "description": "panfrost: Fix ->reads_frag_coord assignment", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "f1de952b695b8f2053d576d3e8093427e598bd4f" + }, + { + "sha": "35ae9408f2a6b2f6f307562e8adc33af95406f12", + "description": "panfrost: Fix Bifrost blend descriptor emission", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "edd98aac3f16f216fa157f031f4c10fc294e7c8e" + }, + { + "sha": "7737ca75391d5ea930390037178a277d309a83af", + "description": "pan/bi: Model writemasks correctly", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "b2c6cf2b6db11eb2293f59b42dfeb3d7481477b0" + }, + { + "sha": "e3893ee2043dc5499407df71e2e0c0a822b416d9", + "description": "intel/dump_gpu: add support for MMAP_OFFSET ioctl", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "44f2de5286c646f9da1fc26af3780e44184aad3c", + "description": "panfrost: Fix AFBC blits of resources with faked RGTC", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "acb8dcfebdde56a748ec50a3fbecf27912974be9" + }, + { + "sha": "752f6d806541ac38594f3adb1e45d63709506e2c", + "description": "zink: setup version dependent VkPhysicalDeviceVulkan*Features and VkPhysicalDeviceVulkan*Properties.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "19b181f0f53ad87528c838bf4416261ee68cf6cf", + "description": "rbug: Handle non-TGSI shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "44875361c88754cb53172205e2657e487d6e6fe8", + "description": "rbug: Forward get_compiler_options to pipe driver", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ec347ee9bc41f99dc8e398c652d873cc192bc99c", + "description": "aco: fix combining add/sub to b2i if a new dest needs to be allocated", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7004548bdf28f47fcc0d2fc1c96457f1a15bc4f1", + "description": "turnip: Remove pipeline NULL check.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f8844d5c729dbc73e63f75f1c7181c7227a75189", + "description": "swr: Initialize FetchJit member mpFetchInfo in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a8467191c395dc9a965e8ccd78ac428595cb6b89", + "description": "Reset new features for 21.0 development cycle", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a706bac098f52647c97c29bdce88469cdfc47e55", + "description": "Bump version for 21.0 devel", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3c4e43e72be3bcb70d7cfb9abcaf77d4c0c75f57", + "description": "intel: Pointer to SCISSOR_RECT array should be 64B aligned", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a1a365e8188bd72be27404dd669cad90468bc09f", + "description": "broadcom/compiler: Allow spills of temporaries from TMU reads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1c5271346af77724f0462d1acafaa49142569006", + "description": "nir/algebraic: optimize bitfield_select(a, b, 0) to iand(a, b)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d47969eb5ed87dcd2d16224f40c82c34c87b5c3c", + "description": "pan/bi: Add support for load_instance_id", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5cd1d8c1ed9b5d9da83900074e983c3e4e0aa3ff", + "description": "pan/bi: Add support for load_vertex_id", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "255f7842c7c9c8fc521db2982d8869c6b5058eac", + "description": "panfrost: Allow linear ZS resources on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4995a4c03aa706a1332177e3aa49898b29ddf1f6", + "description": "pan/bi: Add support for ushr", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af70987b36364c5c6447d7bf66dc06b8e31886ff", + "description": "pan/bi: Add support for ishr", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3257ad21f34cfa82d32555983f45db1b70788eae", + "description": "pan/bi: Fix ARSHIFT definitions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2a80b2d0cd1db54522fbf07a202e546724876bfe", + "description": "pan/bi: Move bitwise op packing out of bi_pack_fma()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cc0950722c03fc3a4f1f6fb709c71e23b4d264e6", + "description": "pan/bi: Get rid of bi_emit_ld_uniform()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fd265fa0206c98baa04ddbffd6c1b3e55f53fb3a", + "description": "pan/bi: Lower uniforms to UBO", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "09da82cbdc3a49a41cbb24b86a9516572b97f4d1", + "description": "pan/bi: Add support for load_ubo", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "87e2169cb90fd33f432cc2a09bef3eb86880ecc9", + "description": "pan/bi: Fix swizzle handling in bi_copy_src()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2522f509a3d8100bd8694fa4f4a9d55e254ae289", + "description": "pan/bi: Support centroid and sample interpolations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ca5a00a70cabf7c77ba0c18bc3760dd21afc91c2", + "description": "pan/bi: Extract LD_VAR sample field from ins->load_vary.interp_mode", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1692088d05386cf4ce22c557abe425dc95cbc16c", + "description": "panfrost: Expose GLES3 features on Bifrost when PAN_MESA_DEBUG=deqp", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "23dbf7964b7df0bc10bdad03ea2ac99b2b2d48f5", + "description": "panfrost: Force late pixel kill when depth/stencil is written from the FS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "956228da3a11a83d938c929415efb375ffc2ca1f", + "description": "radeon/vcn : Corrected dpb_size calculation for VP9_2", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "68092df8d8872bffb07dbd21d1d58733651dc97c", + "description": "intel/nir: Lower 8-bit ops to 16-bit in NIR on Gen11+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b98f0d3d7c3b85001382eadd7dcfa3e11de64ca5", + "description": "intel/nir: Lower 8-bit scan/reduce ops to 16-bit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3ad2d85995eccc5b79732f93a2ced4800275b19b", + "description": "intel/nir: Refactor lower_bit_size_callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f95665cfebbd943ca3795c026e3dabf4af3f262c", + "description": "nir/lower_bit_size: Add support for lowering subgroup ops", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2c4b47184da56dc9e42fd8c91a08d620d9627020", + "description": "nir/lower_bit_size: Pass a nir_instr to the callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "15c6e05a7257a85c4763d28743414011ad59bac4", + "description": "nir/lower_bit_size: Don't cast comparison results", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "86ef139bf46aa9d3a2e8bec927cdca126bd0fcfd", + "description": "radv: implement VK_EXT_shader_image_atomic_int64", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f4326877267425ea3b181d9669987dbec98d395", + "description": "ac/nir: implement 64-bit images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5b81e80fb6c0ffc134778ff83a0cd7f8404c7f6d", + "description": "aco: implement 64-bit images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8570de72f513a3e20016c75b8647704c2f14b011", + "description": "amd/common: add PIPE_FORMAT_R64_{UINT,SINT} to GFX10 format table", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cff195087f539264ef24b4b459d554401261179f", + "description": "util: add mapping from Vulkan to Gallium R64 integer formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bcc3d53587a50a22597225dce878d8d3bdeeaa67", + "description": "gallium: Fix NIR validation when lowering polygon stipple", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "441feda0bb374e551a59af24111d3574d9adc948", + "description": "gallium/util: do not pass undefined sample-count", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "cf170616daa6e1a4545c2bbfd35c9b857fdcf2f6" + }, + { + "sha": "bae5487659636fbbb79021f89ba01ba32e4a3abc", + "description": "aco: optimize v_and(a, v_subbrev_co(0, 0, vcc)) -> v_cndmask(0, a, vcc)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2bbe01b186b943d24e2cc74098959b322a77e9f8", + "description": "spirv: Add support for SPV_EXT_shader_image_atomic_int64", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5a3e22018d11234bb83c24e153c6c2649aed678f", + "description": "nir: Allow 64-bit image atomics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "79f477c3c609131ee6c1be717ac1ed2ae7b0f52f", + "description": "compiler/types: Add 64-bit image types", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d44c76be85f4017159f3deb8a43eb48a8cdb7f69", + "description": "util,gallium: Add new 64-bit integer formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b725fbd1913efc9234736d051a350ef36ae8c1c3", + "description": "nir: Validate image atomic formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "72f1c9aef5d586ea18eb14388b59b9c8c3ce4dd9", + "description": "nir: Print formats on image intrinsics as text", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d22fafa20b704a0108b71b0c104e43e10f1ce0c2", + "description": "spirv: Update headers and metadata from latest Khronos commit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2acdcf0b3174818cd1728e35782715db86cccc96", + "description": "libgl-gdi: support building without softpipe", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d7f7d890f152d84a0bf3f9cdf8294eb18b9ee563", + "description": "panfrost: Add missing Collabora copyright notices", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "14bbc24b60cdd5eb17b3f2798fddf483edd92b00", + "description": "pan/mdg: Add missing Collabora copyright notices", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "92374aebe2d1023406a11bd6467ee4403d4e5bd2", + "description": "spirv: correct sematic-typo", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fc2814417e9961883feb079e0eef3d4d1705d2d6", + "description": "CI: Disable Panfrost T760", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4143572f93ecf8a4c20d02b42d14adc275b235c0", + "description": "radeon/vcn: Bitrate not updated when changing framerate", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "25066eb20a23c023ed7cdeb99f7fed25c0294289", + "description": "v3dv/device: do nothing when asked physical device pci bus properties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ca1969ca880bf246bd2571af2e31b0c93aac2d2d", + "description": "v3dv/util: log debug ignored stype only on debug builds", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1fa4a372568ae9666a00ba01cd6ef8a692aab8db", + "description": "v3dv/util: remove several logging functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1675c83a4fc82439b33fc07599b57f37e707314b", + "description": "radv: Fix exporting/importing multisample images.", + "nominated": true, + "nomination_type": 0, + "resolution": 0, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fb1793bf9c89a4b0c8b475f3fa529ed70a3e6866", + "description": "zink: add some spirv builder functions for barriers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "95e15f774dce2939d19f76a115bcd2d960724fc5", + "description": "zink: add a quadop function in spirv_builder", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "758e932ad27921c77ec1b163d45f3d07d8d336e1", + "description": "zink: use same function for all pipe_context::delete_*_state shader methods", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d377d844969b18264d3eeeeb1bbdf37131c69791", + "description": "zink: make physical device functions use a dynamic function pointers.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "91f9bc18e0cc3d608098b8d7e154cd245711bc90", + "description": "radv: Fix budget calculations with large BAR.", + "nominated": true, + "nomination_type": 0, + "resolution": 0, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9948ead3cd495887eec37f4ac4613541c5f510b1", + "description": "radv: Skip tiny non-visible VRAM heap.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b7793e39ee40727f21c58b72a4b4516b8b5cc948", + "description": "docs: update calendar and link releases notes for 20.2.2", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e053a02ee9702f42d9494ef760487797a7b8d1f5", + "description": "dcs: Add sha256 sums for 20.2.2", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5e1f518f30b1c78311dd96756416afd3f8d3baca", + "description": "docs: add release notes for 20.2.2", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "39d1576eef0cd1628c0afa8b778eea686d3035b7", + "description": "st/nine: Remove unnecessary NULL check.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0bc222706d3ba1ebddf9e9b90197a4fea5f71139", + "description": "zink: implement ARB_texture_query_lod", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf29daa1b5a33ce2cc62b7743b4bf7220c7b8539", + "description": "ci/deqp: Switch to a new dEQP runner written in Rust.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fe61230b38f9eeed590b8e401a716c68796bf373", + "description": "ci/bare-metal: Reset colors at the end of a line of serial output.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ff6741728de7110fa67868ceaf716082139f374e", + "description": "ci/bare-metal: Apply autopep8 to the bare-metal scripts.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f1cd99ba12de7605116ebe0a056c9f501024222", + "description": "turnip: Fix image size for 3D vkGetImageSubresourceLayout.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a0b42da0a2e5a43d8e88605f3678034ad1a2e8a2", + "description": "spirv: fix GLSLstd450Modf/GLSLstd450Frexp when the destination is vector", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81597759beff0729057887340e219e87448d1a4c", + "description": "zink: For MoltenVk added vkFlushMappedMemoryRanges() to vkMapMemory() to fix empty mapped memory.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f0bbd8fdd0871ca9afdda107764818026bce13fe", + "description": "zink: have_triangle_fans support.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2aca3749c572957b3a9e6518327444b9ab8fd0e5", + "description": "zink: add support to device info for macro guards and just VkPhysicalDevice*Features with out the have_.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f0a88dbefa640ddee10b475bdf01b050d998c91c", + "description": "nir/lcssa: consider loops with no back-edge invariant", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "feb9462bb19096ecfcc11b42d5474c6309594cd9", + "description": "zink: Added inbuilt debug logging from the VK_LAYER_LUNARG_standard_validation layer.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "10d44fec2dd353c2926ae882af47df3ec3b10ea2", + "description": "zink: fix pNext chain for resource memory allocation", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "92022f2846e009527a898c8618864766e75d8e50" + }, + { + "sha": "0b53ca0cca9381ad63a34753a3f678397ca56aac", + "description": "zink: return fail if create_instance fails", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6a96729448738476ddb126692311d3b0af7ec4a6", + "description": "docs/features.txt: VK_EXT_separate_stencil_usage not exposed on RADV", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3d58ab7576dfcc7aa84407c1af60f6acc21916fa" + }, + { + "sha": "6ec7e0d4066488dd8dbc90ee2f747809be1f1c02", + "description": "gallium: document convention for get_handle calls on multi-planar resources", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b29fe26d43874c90df60edbffda70fca67b98e4a", + "description": "etnaviv: rework ZSA into a derived state", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3c7fc95f4fe89ca0246e829bae622c0d16fdd47d", + "description": "etnaviv: expose shader discard usage in etna_shader_variant", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bff60d665b8f7f4e4997bf41cde00e29eba50e81", + "description": "etnaviv: update headers from rnndb", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "83c72cab7a13e2ce44e996bcf19a7e2af65daf50", + "description": "etnaviv: flush depth cache when changing depth config", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "01e2ded11f75dc26313840b92b86ab9156072060", + "description": "etnaviv: emit RA_EARLY_DEPTH on dirty ZSA", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "785e2707b0d181967ca8986346fa7482d4fbed0b" + }, + { + "sha": "d4285805345836cbe1a1a0f282aa3a25d5c0aa60", + "description": "mesa/st: use a lock to protect access to variants when updating them", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "77d6fda0f50218c2a37f9229a210ca749f824255", + "description": "nir/algebraic: distribute imul(iadd(a, b), c) when b and c are constants", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2fd5079442cd901ceca8e67eaf7f1d1a83bace3c", + "description": "clover: implements clSetContextDestructorCallback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "04e1b1757cf3e6f870e2a3cc567eea6e99b4ad9d", + "description": "clover: add empty cl 3.0 dispatch entries.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "558325c4c50b530c7ba911cea1a275a33768004a", + "description": "clover/spirv: hook up spir-v environment for 3.0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7230f39c1a2e37ff20a560ba4b9872aa8e3cd456", + "description": "clover/llvm: add 3.0 versioning.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a52c613ea7dcc0622f701b4983ba6e52751f85d8", + "description": "clover: access 3.0 and deprecated 2.2 API", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9790fdf2cebb7cab657ee61e30f9d0b8860f0ddc", + "description": "vtn/opencl: add ctz support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "725df7c15dcb1cdd1e7e4caf3a908a8e8f6a6184", + "description": "CL: update CL headers to 3.0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "041e3147c056ebd9fe2b1cd8e5b1d87db3ea71f9", + "description": "docs: Add MESA_pack_invert and ANGLE_pack_reverse_row_order", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fd104096c61328933723ba807771f6862f1d42df", + "description": "mesa: Implement GL_ANGLE_pack_reverse_row_order", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4506e4db4d66694f2a2b5ea3c419a407131a04ec", + "description": "mesa: Enable GL_MESA_pack_invert unconditionally", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "923577c0662e5f3d0e50f76198eed1abc8d35e95", + "description": "v3dv: Remove unsigned comparison to zero.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "03683b9b2e697302c86abab80be702596f44a06e", + "description": "nir: Handle ray-tracing intrinsics and storage classes in copy-prop etc.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5a2889327970ce2e7f342c357efb50fa25c2a5e4", + "description": "spirv,nir: Add ray-tracing intrinsics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "21b1b915498447a66d555338830183eb6eb97d8d", + "description": "nir,spirv: Add support for the ShaderCallKHR scope", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6b8fd65e840512e26137792c1854ce4795b5f1ce", + "description": "spirv: Implement the new ray-tracing storage classes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "84a8ca1db889564f2ca6bad9f65504615ef4080b", + "description": "nir: Add new variable modes for ray-tracing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aa4ea9c7ea6375fc2460092a6059fc2a32c56efb", + "description": "nir: Add intrinsics for object to/from world RT sysvals", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "07635a3284c511d464ea3e02ef9136f9b8661378", + "description": "nir/builder: Add a select_from_ssa_def_array helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "46cd91bb45aa99b1fd95466cbe83e02bb4f2c258", + "description": "spirv,nir: Add support for ray-tracing built-ins", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ed907e5d84cba4632320bad4dfc35462ca0dd927", + "description": "spirv: Add support for OpTypeAccelerationStructureKHR", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2df055ab47fdaa56dff740b1c7e27e88d549ea45", + "description": "spirv: Pass the deref type to storage_class_to_mode for non-forward pointers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2bef02696d40843eee714a619757a1ce87504a95", + "description": "spirv: Add a guard for OpTypeForwardPointer storage classes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aabe37b96944aabdba16460bdf96faa9beee29e7", + "description": "spirv: Remove a redundant vtn_fail_if", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3bcebe1b27197085ef9880c07996418514f25b11", + "description": "spirv: Add Ray Tracing execution models", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d8dbdf20de840d81f8bc5e12a5ded0a423bb2a5f", + "description": "spirv: Add basic plumbing for ray-tracing capabilities", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f65182931dbcae545034ae7230bbcdcfea7d91da", + "description": "compiler: Add new Vulkan shader stages", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1882a02d83b134877b9effa268da7ac5b62cb1dc", + "description": "tu: Make sure spirv_to_nir knows we support imageStorageWithoutFormat.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "e781cc702557f1102fc6253f5654696490742e06" + }, + { + "sha": "0b0f152c54eba919cf8851e6407d25eae4b8faa5", + "description": "nir/clip_disable: handle 2x vec4 case", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5e43ba39e112e702b8f8060101870acbd8163b79", + "description": "nir/clip_disable: try for better no-op", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1d23a88c6eb7502ea38e3e9d1a28b7df49c78e29", + "description": "nir/clip_disable: write 0s instead of undefs for disabled clip planes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c84ae1523e03361e27f592f0a398835e35a01769", + "description": "iris: Move blit scissoring earlier.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0f96a9ab3b094e1cc2534779bf3a5c6f1d49177a", + "description": "anv: restrict number of subgroups per group", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b2c719308c5d691b84d3ac7dfca9f9704b5f74bd", + "description": "turnip: enable VK_EXT_image_drm_format_modifier", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f624692a57d3abc98b8e8f22a24ba014781ae0ff", + "description": "turnip: don't always fallback to linear for mutable formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8c4426f519c95f27c4842afea08571b82aace999", + "description": "turnip: remove unnecessary/redundant tu_image fields", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c64cd6988f1a1d00969472273a293cd0dd0bb629", + "description": "turnip: remove useless tu_image asserts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dfaa8b9ae7200b34c9cb6f320731fbc869282cb5", + "description": "turnip: LAYOUT_PREINITIALIZED is not different for optimal tiling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "43c16483e0b02acf900a18bd281c26d1da9a74e2", + "description": "turnip: don't implement CreateImage as two separate functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1a1099c54f1c09674a27e8a1ce196a05ee2fb467", + "description": "aco: Fix format string used when raising validation errors", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "c2b1978aa47c5b8876a589aa035a670d55e87c2e" + }, + { + "sha": "61d2badbf472bec3da16f5faca6f668d2164e101", + "description": "nir/deref: Fix a typo", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "df51518dc5b67bee7488a8b65c5f09653367fd63" + }, + { + "sha": "60c5729d1669f9d91cc77d71f8e1893f2c0d0c94", + "description": "ci: Distribute ADMGPU driver to LAVA as a module", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb41acf96afb8f836da434174037d33485adb903", + "description": "ci: Update dEQP skips and fails for Bifrost on G52", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "80cbb324434558a07907b010e42edf2376b5b828", + "description": "ci: Update kernel for LAVA to 5.10-rc2 plus patches", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "092186d9857e81f5f4c4f3c3a8820cb2d1a57b9a", + "description": "util/threaded_context: use driver's buffer alignment for staging transfers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "60b9c00afd25d0294b508f490ae46389f6fdc3bb", + "description": "v3d: Add GL_ARB_vertex_array_bgra support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9648bfba5641ae8609a9638b3fa888d59b27e006", + "description": "v3dv: mark the right bit to swap R/B vertex attributes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1e723745dde45da4adafbb9a28d17850484fc5a7", + "description": "v3d/compiler: extend swapping R/B support to all vertex attributes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "44925a8a5547ba63a755668dfa947052f525ac8d", + "description": "intel/tools: add missing new lines to few remaining fail_if users", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c323d7c2a7f65fa2d9504e35b4e3be16d9e06a61", + "description": "intel/tools: refactor logging to be easier to follow by static analyzers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f0061277c00d269d4e1530c4d85de7184e793bf9", + "description": "intel/tools: handle some failures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cd9907e7d39961e60bb68314139d6f74836b502b", + "description": "anv: remove dead code from anv_create_cmd_buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d13b7d659145e74525e85f5615607cce4bd03c2a", + "description": "intel/tools: allow --color option to be used without arg", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0dad1a7c72a88323533f89719c0217bb0e05066c", + "description": "v3dv: expose more features", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0b30336906f50ed6a96ca20cba336f6ea380f0c6", + "description": "broadcom/compiler: Handle non-SSA destinations for tex instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e331fd7fc42175aa7f466c96f1fcf42777909c9b", + "description": "vc4: use intmax_t for formatted output of timespec members", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c8630fd1143f055249a35877aa056878e56d00c3", + "description": "amd/addrlib: Add missing va_end.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "69ea473eeb91b2c4db26402c3bc2ed5799d26605" + }, + { + "sha": "fe52efaa98eae711cc6e7cd11f2d05471fff4b00", + "description": "loader: Print dlerror() output in the failure message", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5d5f3e3a4779f4efc41793498525fe611807b8e1", + "description": "intel/fs: Implement nir_intrinsic_{load,store}_shared_block_intel", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9fe158e1d102d93dbf3b8117aeafcfbaaf35070c", + "description": "intel/fs: Implement nir_intrinsic_{load,store}_ssbo_block_intel", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d372abe397316fd8f8e21111e87d925ceda42d56", + "description": "intel/fs: Add surface OWORD BLOCK opcodes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "296137df53aa0aad78277edbcd48ed09664210a9", + "description": "intel/fs: Implement nir_intrinsic_{load,store}_global_block_intel", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d3d2b73fa3c31b90a622592376585a415e41d3f1", + "description": "intel/fs: Add A64 OWORD BLOCK opcodes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eb03f29655cb6324ebf8016048fb9c21878eb1f5", + "description": "spirv: Implement SpvCapabilitySubgroupBufferBlockIOINTEL", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dd39e311b336dddcc7343c5119ce05af410d8dc5", + "description": "nir: Add nir_intrinsic_{load,store}_deref_block_intel", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b86ce274f9d189dbd2dac65ede5a2c14513787a0", + "description": "spirv: Implement SpvCapabilitySubgroupShuffleINTEL from SPV_INTEL_subgroups", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5194cbc76633e4458f1c5f1db388950cae0200a9", + "description": "iris: Flush dmabufs during context flushes", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c488fb6dfecc03aa7d3a603876c5e00937e8ea28", + "description": "iris: Fix fast-clears of swizzled alpha formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "60336cac6014824267de013718f339e68ed7c695", + "description": "iris: Fix SINT assert in convert_fast_clear_color", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4cb9b5db2ce9ba53ae96aa0571b298fb243c0a36", + "description": "iris: Fix fast-clears of swizzled LA formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "382451ff9d7bfa4f807e8ca447180642c91b92e6", + "description": "iris: fix source/destination layers for 3D blits", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "61ce544d070c58fe9823a06ba10319104074e1b5", + "description": "st/nir: Drop setting interp mode on system values in builtins.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9143c08125a33af56d4f05fd5161a572c0c6b4ac", + "description": "st/nir: Fix the st->pbo.use_gs case.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "57effa342b75a2ae681f2a7665925022dd6e4aa9" + }, + { + "sha": "f8c1d79f69a8438d96a8177a877249a7ac22ae37", + "description": "pan/bi: Correctly calculate render target index", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9a6dad18d8d716534a0b870cf0ac7a9b5b9d25af", + "description": "pan/bi: Lower depth/stencil stores", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6433fedcf1c09174b7ee6137775f5340ffa6120b", + "description": "pan/bi: Emit +ZS_EMIT as needed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0c889e761124553c6398ddc6a165482c6c30e687", + "description": "pan/bi: Stub handling for nir_intrinsic_store_combined_output_pan", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f67b49d5123398b9e76babe0ccd3510c813e2c46", + "description": "pan/bi: Factor out bi_emit_blend", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "928f2bc0d575dde540aacc3f7664b2254f49423d", + "description": "pan/bi: Factor out bi_emit_atest", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f14beeb4ceeb4b7a07f989f6702aa317c9d7289a", + "description": "pan/bi: Infer z/stencil flags from sources passed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2b1db3662f37a3f455857071335079c907810698", + "description": "pan/bi: Add +ZS_EMIT instruction to IR", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cd66aa712d9498b3c0d9a976d32e0bc487138a77", + "description": "panfrost: Deduplicate shader properties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4a2ecc72d341cdde00b6ebdda3bc539d6ca47de0", + "description": "panfrost: Pass through src_type", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "42319c562642100a2c5d07dd8522f63e6ea6f0fb", + "description": "pan/mdg: Move writeout lowering to common panfrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dfaa4c51fbab842ad280a199e5bce87a2bcc2fcd", + "description": "pan/mdg: Deduplicate nir_find_variable_with_driver_location", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a05921b9f213461b0fc6af59a2409956addad342", + "description": "nir: Add SRC_TYPE to store_combined_output_pan", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fa869fb9b454e8dbdcb1b1251728a287452df5c8", + "description": "v3dv: add a v3dv_bo_init helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "456beb40b8f74d37017eaeb6d0ad0a7108b08345", + "description": "aco/ra: Fix counting of subdword variables in get_reg_create_vector", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "52cc1f8237d9ed0042578777af4b28e5b33c6354" + }, + { + "sha": "786828131a7c72ae1f9a21159255464ac7f4ae8b", + "description": "aco: implement 8/16-bit instructions which can be trivially widened", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ef95ba8cdd1491a945cebc1cdc12ca0f066afc32", + "description": "aco: implement some 16-bit arithmetic instead of lowering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8ed7cad75f37e22d30667f3fcf98421fa860e9ee", + "description": "radv: rework nir_lower_bit_size callback and run DA on GFX8+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b2ce7da14986bb4d0835894744ac48614c4da9f6", + "description": "radv: do nir_lower_bit_size after algebraic optimizations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c77114967f82985d53d12d89f87340bb76077e54", + "description": "radv: move a few passes to after load/store vectorization", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "475077c790feeb0b677bd91107ac87711ca468aa", + "description": "nir/lower_bit_size: optimize upcast of b2i8/b2i16", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4e5c85526b1c66a004d14e7b8ffa13b2bb0e706d", + "description": "nir: add shader_info::bit_sizes_used", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7d3df69914d015ffd397ec1349b06aaa0cf03973", + "description": "va: support VA_RT_FORMAT_PROTECTED", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81be8b3c2f2785932b63d4b82ee33cc3d6ac2765", + "description": "va/picture: make sure destination buffer is protected if needed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "70191f38e3278b75ac13ea16e71d0ffdbb7bf8d9", + "description": "frontends/va: Added protected playback support for VP9", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f20a20b1e3f513e0075c87d3b406f95bd9ed7df2", + "description": "radeon/vcn: program drm message buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ebbf40fa263c455b3f2da11b2fece83c4342498a", + "description": "radeon/vcn: delay dec->ctx and dec->dpb allocation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cf324df6eae0fc42f02c541050737d294122087b", + "description": "radeon: add si_vid_create_tmz_buffer helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "024be710a72a3b20ae3c6bbe90c6bac790d81a02", + "description": "radeon/vcn: add defines for drm message buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9753fdc581ac48f00b12fbc41e98853cd3d728a", + "description": "radeon: add decryption params definition header", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "deb7dc82f626e92525d01829d88f0ac348de03b8", + "description": "frontends/va: handle protected slice data buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5f0816eac0908bb7671231d483b7f7af1f474661", + "description": "vl: add flag and definition for protected playback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "58e7088628a362fe9d3163429a12e5f828b5b200", + "description": "nir/find_array_copies: Don't assume all children exist", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "9f3c595dfc4cd1745e10698f0c037a7f32b63789" + }, + { + "sha": "e1736c7fdb203b3511db0d580f1af99b51f4b68e", + "description": "radesonsi: Remove unnecessary shader->selector NULL check.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5fb77a582a82e6e009f91c2bc74bb6b138afd657", + "description": "lavapipe: request correct sample mask behaviour", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "47701d47e7919b369d19d5a0e234d92c8eb01476", + "description": "llvmpipe: respect the sample mask in non-multisample flag", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3dc6da1ac1f5be2d6421940739b88f7e8d36cdf9", + "description": "gallium: add a non-multisample sample mask out behaviour flag.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ff4d4e56966a40bff83d1813e3a308cff38736e", + "description": "nir/opt_intrinsic: Optimize bcsel(b, shuffle(x, i), shuffle(x, j))", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f5b56ae237b9826e0fec32bfbfd28e8bcc4da78", + "description": "nir/opt_intrinsics: Refactor a bit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3b281861c1acd9d0815a63cfa3620dc63ca81293", + "description": "nir/constant_folding: Fold subgroup shuffle intrinsics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e59d6350d1f67a0bc43912296a12a3ae80cd96f3", + "description": "nir: Move constant folding of vote to opt_constant_folding", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9492ab286402700175331feb4511e72c9f2c9b20", + "description": "nir/constant_folding: Use the standard variable naming convention", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9d2ccbfc15c98256e78e0b7bd51658e3fa38e012", + "description": "nir/constant_folding: Use a switch in try_fold_intrinsic", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d9c0f3627d78c33926c358f362818f9fe917c8a6", + "description": "nir/opt_intrinsics: Report progress for the gl_SampleMask optimization", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "d3ce8a7f6b93e893929b81f4d6605a2a968730a1" + }, + { + "sha": "b90063201a92d5ba3ec4b86ffe2f03aca0054a0c", + "description": "nir: use nir_alu_src_is_trivial_ssa() in nir_ssa_for_alu_src()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "233a820f2c3fb0a008a84a16a0920c3bbdbd61f5", + "description": "nir: skip bcsel with non-trivial swizzle in opt_simplify_bcsel_of_phi()", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "b031c643491a92a5574c7a4bd659df33f2d89bb6" + }, + { + "sha": "1df2fc9f9c8d720d05bf1da7b2baf659bfae98ed", + "description": "nir: add nir_alu_src_is_trivial_ssa()", + "nominated": false, + "nomination_type": null, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9f95280119132548ffadd2e9c5e44c34a1056b8", + "description": "nir/lower_io: Add a new 62bit_generic address format", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b84f74f9b7fc4499c1cce489880bb342267192e9", + "description": "nir/lower_io: Support generic pointer access", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a451f037ffc830e2d12274048363d3ee607655b9", + "description": "nir/lower_io: Add support for lowering deref_mode_is", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "57943112d9aca94aa1ec4ec3e2dbe1a21a643296", + "description": "nir/lower_io: Add support for 32/64bit_global for shared", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c50332fbc299983417e77763bd09c0c17def2f2c", + "description": "nir/lower_io: Add a mode parameter to addr_format_is_*", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7007d06898a222a1f39f1eb687c2cf4c98b5b78c", + "description": "nir/lower_io: Add a mode parameter to build_addr_iadd", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ff838abc463acaaddf92e3f7f9f97a5c41fe25fb", + "description": "nir/opt_deref: Add an optimization for deref_mode_is", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "df51518dc5b67bee7488a8b65c5f09653367fd63", + "description": "nir/opt_deref: Add a deref mode specialization optimization", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a8e53a772f617877f7735a2c7a4031701845b3a6", + "description": "spirv: Add generic pointer support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d6415b5d2b3fc295fb8b0ab0d8b8ce303ceb4153", + "description": "nir: Add support for generic pointers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9d377c01d0f506f9d0c436ae4f09faae96b979b4", + "description": "nir: Make nir_deref_instr::mode a bitfield", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7d5f3b5c0e6b43213a1a29d939afb328688befa3", + "description": "nir/split_*_vars: Prepare for generic pointers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d50a4dbc13ceedb65fd14b5976f8c73da2f693e3", + "description": "nir/find_array_copies: Prepare for generic pointers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ced9b6f0d8ef4211afd5ea4c09edabd2b3b80f27", + "description": "nir: Use nir_deref_mode_may_be in deref optimizations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "379d5354804e6cd443b821bd339260a2394dd536", + "description": "nir/vec3_to_vec4: Use nir_deref_must_be", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8a2cda1d53f52a1853c6f38eb3854be28cbf29ca", + "description": "nir/vars_to_ssa: Use nir_deref_must_be", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0f94ff8a6ac7a876cf0bdfaa2a618217ff79fce1", + "description": "nir: Only force loop unrolling if we know it's a in/out/temp", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fff78fc1c58aa1a63c72dc015e58345a9c9e1d66", + "description": "nir/phis_to_scalar,gcm: Use nir_deref_mode_may_be", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f3e3dfd2fb96faae0922d8bae03f011c08b1bb1", + "description": "nir/lower_io: Use nir_deref_mode_* helpers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9ae87a6c312502483edbf5469e7e15295b356d66", + "description": "nir/lower_array_deref_of_vec: Use nir_deref_mode_must_be", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3cc58e6470619ea708497050aa4692ad5f9fa3d6", + "description": "nir: Add and use some deref mode helpers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "74886cabaa3e07b25bcc8890b485f9b0faf1b3a7", + "description": "nir/opt_find_array_copies: Allow copies from mem_constant", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "89abfbeb7ac53e29573e4412241c0b6a5f20637d", + "description": "nir: Disallow writes to system values and mem_constant", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb5d5029b7423d4ffe3b84149b2bc6a770647ef2", + "description": "nir: Use var->data.mode instead of deref->mode in a few cases", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5664713d7b99b99dcee1d009ef6e5b105a2d65ee", + "description": "nir: Handle incomplete derefs in split_struct_vars", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "f1cb3348f18a9b679925ee537091e52749e9f6da" + }, + { + "sha": "6b72004f12c8db56ae6838e0fdb74d416e918928", + "description": "nir/phis_to_scalar: Use a deny-list for load_deref modes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3f0a29fffbce36dc4a68dd8335720b1ee0b9aa28", + "description": "nir/builder: Add a nir_ieq_imm helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9d3615166efa648499157c7747db96076081721b", + "description": "lavapipe: don't advertise linear filtering on integer textures.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3d81cf4621e08d206b204311069a295618ed57f1", + "description": "lavapipe: use clear_buffer callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c3d8a69c3a6511d9c44d862193253c39964f213a", + "description": "llvmpipe: add clear_buffer callback. (v2)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1e3fbee4b0ad8424df75c5729125d8b5164638e5", + "description": "lavapipe: stop crashes with 3D z blits", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c871ac04a158401f36c0cc4f9b030509f3cab6d8", + "description": "lavapipe: fix 3d compressed texture copies.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "42b5cfdbd26cf1540f12d8193dd70d7bd01a9ccc", + "description": "gallivm/nir: fix vulkan vertex inputs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "621c4f816f1bfd51996f5958973b175f17a2198d", + "description": "gallivm/nir: handle dvec3/4 inputs properly.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a5aab63fb10607ea1f089e652d24f5acb7893a69", + "description": "lavapipe: fix dEQP-VK.info.device_properties", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6dabb8065abd8a5036ac84876f56a76d7b1eec32", + "description": "lavapipe: constify state pointers into command buffers.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "33a2f27ce96203bce6c10f59ac7c863379efa30d", + "description": "lavapipe: don't write to pending clear aspects in cmd buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ad3849259a88de04367857765f4b8d56d62dacbc", + "description": "gallivm: fix f16 quantize.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7ae81d65a495a6ba6da6dda6bcbd91a30cc2a8dc", + "description": "r600: amend space check for chips older than EVERGREEN", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "32529e60849dd20d167f14cb8542c5798343f0e0" + }, + { + "sha": "902b3182133061f1bebe55888c8884e8148878e9", + "description": "zink: break up dynamic access lowering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e12e67476a53fb365f0751e1a4ab48f53e743c4d", + "description": "zink: add pass for lowering dynamic ubo/ssbo block indexing to constants", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "71a197943cf856b320082e4e969e0e57de7d9e66", + "description": "mesa/st: Fix a use-after-free of the NIR shader stage.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "57effa342b75a2ae681f2a7665925022dd6e4aa9" + }, + { + "sha": "6b85a887ff511c40ea6ea0f1aea311151ab9d01d", + "description": "mesa/spirv: Lower variable initializers for global variables", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c2cb2dd3bc68d0d61fa483a3927563bab08c93e9", + "description": "zink: Added support for MacOS MoltenVK APIs.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a0f889bdadb18649742b9ac8553b3d08cd82e083", + "description": "zink: Basic framework to check for optional instance layers and instance extensions.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bfa9fd88fc7c942ca684602fb497fc3e50cbfc4f", + "description": "radv,radv/winsys: Move RADV_MAX_IBS_PER_SUBMIT", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6e5baf4afecd6486f3515992682de2e7701cbbe", + "description": "docs/features: add some extensions we missed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a2056b025ce0da52b90e4520c64e85208a9b2e0c", + "description": "docs/features: VK_KHR_mir_surface is disabled, remove it", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3d58ab7576dfcc7aa84407c1af60f6acc21916fa", + "description": "docs/features: Minor update extensions support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "89c4bba8bce40edbb621af9ddc121ea498658338", + "description": "nir/algebraic: better propagate constants up fadd chains", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "16c756e55d553673314d22931bb0a84864380ff3", + "description": "spirv: reverse order in matrix multiplication", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "24a18b1a4b99c5dc97bfb2cf9a2b0c2211839b3a", + "description": "nir: scalarize fdot in reverse", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6dc6b8ad9f297f16294b277d97f0ee2cf89533cd", + "description": "panfrost: Set .array_size on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eaf0be88f6ce75f4f230d0e787aa0d4cb182502c", + "description": "pan/bi: Don't emit TEXS for array textures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3a86e1e92d92f20775738dbbd0654907212d050b", + "description": "pan/bi: Handle 3D/array coordinates", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "56bda0fb4947356931463833c7864998a1e96452", + "description": "pan/bi: Track tex data register swizzles", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1463824319b428f9ec8f2f227036e7508bd2deea", + "description": "panfrost: Add bi_emit_array_index helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "914eebb0bcebdf8b87ae7081a339358eacaba611", + "description": "panfrost: Drop unused swizzles", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b6c71425c267d2576009bfb50f8590d272045829", + "description": "panfrost: Advertise Bifrost support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "828c7e76267b6bccd284aa4de4d518d87393780f", + "description": "panfrost: Disable point sprites on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ed003633cd5e546fd3299f72ccdc323b1549298", + "description": "pan/bi: Lower +CUBEFACE2", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "31864017510223c62c940129cdc0a1c352f38e32", + "description": "pan/bi: Suppress disassembly for internal shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f00f82469e0df49f08edf9ca1b4930bdf0cdf08", + "description": "pan/bi: Lower cube map coordinates", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f76558be656c7d69d73d9a74e94d88866d0d2b31", + "description": "pan/bi: Hook up cube instructions packing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0ed8eee706927a83794508986c6534d64cbd2379", + "description": "pan/bi: Split special class in two", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "20cc63815cd5ed355530125f42a26bc65bb2fd5b", + "description": "pan/bi: Move special instruction packing to a separate helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "07f1df51aa3524a10f2fd70ed80dc15516d0ff97", + "description": "pan/bi: s/t0/t1/ in bi_disasm_dest_add()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "47131919d86ef920ee00d410c7b779bf71b16aa1", + "description": "panfrost: Implement v7 texture payloads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4e0826dcc85fddce05b134ba9ce2d0c8b45b6507", + "description": "panfrost: Add array size to XML", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "557633b142ea3d8ac4158302a87177118bd27a98", + "description": "panfrost: Suppress Bifrost prefetching", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c0b8f7394bc83e1dabad7affde7b105abb26e116", + "description": "panfrost: Leave push_constants pointer to NULL if there's no uniform", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "29fc115d5810f1b1b65686b543b687d6d08205e9", + "description": "mesa: do not throw _mesa_problem when invalid enum is used", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "e064d660205a04e7d9c63fdceebf3c293a6872ed" + }, + { + "sha": "e02e1ccbeedf0082e0440b557f21ea9bc4c46173", + "description": "mesa/st: call memobj_destroy only if there is memory imported", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "99cf9108340c2ccf7c1b05ec6f951e2e91b9c76c" + }, + { + "sha": "57c152af9ce2e34c1a8724574c6a2c5d19c0c845", + "description": "aco: select v_mul_{hi}_u32_u24 for 24-bit multiplications", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3a72021d7cf4486c52f60dd3a6ae776235d5b587", + "description": "aco: store NIR range analysis data to the isel context", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81ff38947a4556d693abaf162394ed77b7608eed", + "description": "zink: clamped maxPerStageDescriptorUniformBuffers limits to INT_MAX when stored as uint32_t.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ac65d3b6b8522a25b91150a61cac1799c551c51b", + "description": "radv: fix shader caching with NaN fixup workaround", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "6f21995f986a8f119c916bef3293991e34774678" + }, + { + "sha": "36f62494ec7148931201726dcb5f79e5d7965ae6", + "description": "radv: fix shader caching with discard->demote workaround", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "bdd7587414441920743fe476270560722b6beb18" + }, + { + "sha": "19f3911cf822e9f399fbcc4d3d5f72b76760f93e", + "description": "radv: add some missing radv_{start,stop}_feedback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "200bcd7a44dc9a7c74da9299d72bc0ec828762f5", + "description": "android: freedreno: Add freedreno_dev_info.[ch] to Makefile.sources", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "4a0bdf47e43d23ba3026fd182c3dc9fc24ee0c2e" + }, + { + "sha": "09b2bd1df935a0dc71bc4da75076341e4dbd85d6", + "description": "broadcom/compiler: remove v3d_fs_key depth_enabled field.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "21ffacff8c70c35649679fd67f2ee770245751e4", + "description": "intel/compiler: remove branch weight heuristic", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "06764e0e5d5e37f9a3e00db7676b76d5472e305b", + "description": "intel/compiler: use C++ template instead of preprocessor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "788f6dc85781c58b37c0d762085090d2d5d8d3d5", + "description": "Revert \"gallium/dri: fix dri2_from_planar for multiplanar images\"", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fe3e571870e00e954ba0738eaa7a276255a77e63", + "description": "tu: Support rasterizerDiscardEnable and RasterizationStreamSelect", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "841f736824369847db2d488308e5331dcbad3104", + "description": "tu: Support geometryStreams", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6b8d30ec1e3260c554200269e26a8c908730efee", + "description": "util/bitset: Add a range iterator helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "563789ce3723facfb991f7153ef4740ccc1ef097", + "description": "ir3: Support geometry streams", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "48cfaecd4f3bf0bc95a0cc91948f0141a82ec62e", + "description": "freedreno/a6xx: Update SO registers for streams", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "92022f2846e009527a898c8618864766e75d8e50", + "description": "zink: add VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA for WSI allocations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "98ebffc9f305d6964649b8d8ec41e47c5458caa9", + "description": "etnaviv: move etna_destroy_shader(..) to generic location", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2dc73d2c7f7c8c6a51d1631a38eb02f0b3f7ab7b", + "description": "etnaviv: move etna_dump_shader(..) to generic location", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3bef6dfe422ee61a1ca0a5f487856ca8d5bd83d8", + "description": "etnaviv: convert from tgsi semantic/index to varying-slot", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "edbdd977232f0cfe8f26a44ddbda792780168f4a", + "description": "nir: make tgsi_varying_semantic_to_slot(..) public", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4bd18e772a28e574562c40d81eafd76834faf185", + "description": "amd/llvm,aco: Replace VLA with alloca", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e3515590bfb3e25ad5d3c60d698a5cf19c8e3f43", + "description": "zink: require Vulkan timestamp queries for time query caps", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b07b943f30cdf25d0bddb422dcf00d0fa94d4a80", + "description": "tgsi: Initialize tgsi_declaration_dimension padding.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7278f900394e7d8124075557a8008f14bebff20e", + "description": "gallium: Remove duplicate resource variable.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "34cc6a804ec92b5e74cde5b346828874737276a8" + }, + { + "sha": "78a420ce46716bf788b4a6bfc85ae226e0c6c804", + "description": "nir/validate: Explain why we don't use nir_foreach_block", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e8a332b972ef10f42fd490365bdff15a127d504a", + "description": "zink: always reset query pools on next query begin", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "60db5af05a7e8230d54d07ecd6056ebb0a9be6c8", + "description": "zink: always use query->type for starting/stopping xfb queries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e1e3484a10b0de17132d66b11a062bb142790c06", + "description": "zink: rework query overflow handling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7fc806bbbc929fb298a60a8e9a9968d7c8f49c66", + "description": "zink: fixup gs/xfb tracking for primitives generated queries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "28ad3c1735a07e9c13f78a877d4888dfd8cab508", + "description": "zink: store batch id onto query object at time of start", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b073398be1d521bba685a888ece2130b121f3a95", + "description": "zink: also create an xfb query for every primitives generated query", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0b2475f5fd14cf7a291108368e33649c47a9eeec", + "description": "zink: more correctly handle PIPE_QUERY_PRIMITIVES_GENERATED queries", + "nominated": true, + "nomination_type": 1, + "resolution": 0, + "master_sha": null, + "because_sha": "e40a77ea5d0ba77f44e9266b43c069d921aec30d" + }, + { + "sha": "a8785579e00b4f94f814ccc03e039af0d17320a7", + "description": "zink: deduplicate some query result code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b6a0309f482da368ea252808d2de44eef54556c8", + "description": "lavapipe: use resource get param.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2dcc9c7f54edbff075665ebe5d50f2499dc12163", + "description": "llvmpipe: add resource get param support.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ae17e1fdbcb2e85a5a605cf3a81e4b3b8b7bff87", + "description": "gallium: add a layer stride pipe resource parameter.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e9724722a3a7d57674474fb347aee292f1d6077c", + "description": "gallium: add a level parameter to resource parameter get", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0e3f2e632154d003cc582468dfcb758d9e8cf100", + "description": "zink: call the reset callback not only during a status check", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "95b9fc4146656493ad2544bd75ee14c01feb9163" + }, + { + "sha": "efadeb3269b54119cb185fd9126cfd0d7e9ede0c", + "description": "i965: remove prototypes of not-existing functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e3f6a9ea36d9181b2b9fee41402be234f9ea88a4", + "description": "intel: remove dead code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b5e2c58ad865be4d88e4d29b5461015a82633e78", + "description": "anv: always annotate memory returned from anv_gem_mmap", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "4abf0837cdb14b10a58d28766d5c1d3698d8a6d8" + }, + { + "sha": "990343b70da7969e84730980c312404ab2509a22", + "description": "turnip: rework android gralloc path so it doesn't call tu_image_create", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b2a21febe0416d7d5506fdc9592b7b4c10431077", + "description": "os: Fix open result check.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "13fa0513569d0145487e12f3bc3aa10bc51088f7" + }, + { + "sha": "1eda842318704ed477c7669d2c8e94d6394792b9", + "description": "iris/bufmgr: Handle NULL bufmgr in iris_bufmgr_get_for_fd", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "7557f1605968c39d680545d5b8457d17eea3b922" + }, + { + "sha": "eb61f8959e0a2de35dcfa518ea9963424fe715c4", + "description": "i965/bufmgr: Handle NULL bufmgr in brw_bufmgr_get_for_fd", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "4094558e8643a266dfc8da9bc073751a3736a2fb" + }, + { + "sha": "aa2f6bd4f5688034bb716a50104c93c2881b29d9", + "description": "freedreno: Use freedreno_dev_info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a1d2b215f10dc5fe9dfa2d027b6c07dd8d6849f7", + "description": "tu: Use freedreno_dev_info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4a0bdf47e43d23ba3026fd182c3dc9fc24ee0c2e", + "description": "freedreno: Introduce common device info struct", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ea81889ea436c2de7e3f3937f95f96e71d4c50ad", + "description": "nir/large_constants: only search for constant duplicates", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "b6d475356846f57a034e662ab9245d11ed0dd4a0" + }, + { + "sha": "ce5e2e21317d0bd29b4b46f8efd37985a62f5460", + "description": "nir: Stabilize compact_components sort", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aaa94d92d523b550bb8f20146505f0becb82bcc2", + "description": "docs/features: add Vulkan 1.2", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8c4d15d57eead51e3385af7cad8e66c368337ef4", + "description": "docs/features: update unpromoted Vulkan extensions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8077f3f4c4a3d8007caa30eed93fed1c6bbf3c5a", + "description": "nir/lower_returns: Append missing phis' sources after \"break\" insertion", + "nominated": true, + "nomination_type": 0, + "resolution": 0, + "master_sha": null, + "because_sha": null + }, + { + "sha": "95b9fc4146656493ad2544bd75ee14c01feb9163", + "description": "zink: implement pipe_device_reset_callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7d6609e70d50795a582f549264e1a3359ed42ddc", + "description": "v3dv: fix occlusion query inheritance in secondary command buffers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "12f87b6e7c31a1afc48a0c3dcd18ccc112bdebb9", + "description": "v3dv: add support for timestamp queries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "520f3e27b5b785c606f6fd465ada0334ca08efeb", + "description": "radeonsi: fix RADEON_FLUSH flags conflicts", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "ed3c5fe4694c1938b85b8139e53c642facd3a995" + }, + { + "sha": "18b7cafc700055869f1209b32a3ecb9dc9ee6752", + "description": "driconf: add disable_protected_content_check option", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b0ffa9ecded84dedb2754f779c9be13e092033c", + "description": "egl/dri2: implement createImageFromDmaBufs3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d9582ff39e025a620518982bbde888970d67adae", + "description": "dri: introduce createImageFromDmaBufs3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2aff88a94572c63f978273b7872f634b8ae10521", + "description": "egl: handle EGL_PROTECTED_CONTENT_EXT for eglImage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "52268a3833e9b63df26bf73f1507ea4fe50c96c5", + "description": "radeonsi: enable PIPE_CAP_DEVICE_PROTECTED_CONTENT", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bd182777c8f2c4468568901ce92b1796674eb87d", + "description": "egl: implement EGL_EXT_protected_surface support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9698a222a6c26e998d9efe6d940e11772aa8610c", + "description": "radeonsi: honor PIPE_BIND_PROTECTED", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a65350454993b29c53a00592a6d33ee5a04b28bb", + "description": "gallium: introduce PIPE_BIND_PROTECTED", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7748e501382bb7c8e845ac67f4631c3fd5e354df", + "description": "gallium: add new cap PIPE_CAP_DEVICE_PROTECTED_CONTENT", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b54c054a4182b30e7d8a13ee8a80eb57920e20a6", + "description": "v3dv: expose VK_KHR_maintenance1", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "53c6dc504b03a8bab80538d008861a7f14e3592e", + "description": "v3dv: implement vkTrimCommandPool", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0dedee7636e8fa64734dfb1c8f740b3665b52607", + "description": "v3dv: update assertion to match VK_KHR_maintenance1 semantics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e4988e6affe5f04013043d75c21bae1d4aa84956", + "description": "v3dv: fix base slice selection for copies involving 3D images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a9b8077adbe181e1dab20185bbde444c0468cac", + "description": "v3dv: add image view debug checks for VK_KHR_maintenance1", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "03f260cb272838b7bd2e65cff6bb50840db56679", + "description": "radv,aco: optimize computing the sample mask for per-sample shading", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c63bcda22c8deaed063e0a950839012b97b041b2", + "description": "radv,aco: adjust the sample mask only if per-sample shading is enabled", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ea83fd912423ac0247395f5c1ccabe94cd95ee24", + "description": "glsl: drop NMS OpenGL workarounds", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d5dd779c4905a93b623915e224b2eefa40f073f9", + "description": "panfrost: Add missing alpha-first special formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3a67806edcdfe5d0f89542eb6c2b5ba95791d144", + "description": "panfrost: Fix BGR233 component order", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b5bc09358416a07190e2535eea548141efaebf46", + "description": "panfrost: Fix RGB5A1 formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f2af23bd1208d1ccf6bee44c3aa37d0a4a544c9d", + "description": "panfrost: Use macro for panfrost_get_default_swizzle", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c5d86198a4e8e7b35812a975fa30edc528de4ff0", + "description": "panfrost: Add missing 1/2/4/64-bit formats to XML", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d473622fdeeec6c79266b48ea3e6000b9b0e4e71", + "description": "panfrost: Rename VARYING_POS to SNAP4", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1b5cac4511f5e61cfa85f9b82291006b618c3250", + "description": "panfrost: Rename VARYING_DISCARD to CONSTANT", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c6bdd976e6110fbf4ec1add9c11d0382e38236d3", + "description": "panfrost: Split out v6/v7 format tables", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "29bb2812c141ca6dcb4d568a3b11bd49e6cf19eb", + "description": "panfrost: Add v7-specific depth formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8ca1478b2b31f67c358e07e707f68611fb0d8b76", + "description": "panfrost: Add miscellaneous missing Midgard formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a21d3debc3f16e93c2417849379f9d9f4247b571", + "description": "panfrost: Add missing depth/stencil formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6fb9ef919f3b4583d4662c59bfa93c60025bc93", + "description": "panfrost: Add v7 special colour formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cae1be954cadbe0535a3147009072c46662ab198", + "description": "panfrost: Remove panfrost_is_z24s8_variant", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "870283bcc4ac2e2275d7ff6ddf455164f4f264a7", + "description": "panfrost: Remove duplicated format arg for ASTC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "44155fa661c300a69dc1edc74edc7c15ef33f743", + "description": "panfrost: Complete format_to_bifrost_blend", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e1624b67dc1872fbf7cef09b31279b61bbb7b3a1", + "description": "panfrost: Use panfrost_blendable_formats for blending", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "41f06ac8776eab138d50548e0167ee1e5df688a4", + "description": "panfrost: Use panfrost_blendable_formats for SFBD", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "293198ea04ef4bc286f7e425e332360b3230424e", + "description": "panfrost: Use panfrost_blendable_formats for MFBD", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d9b16ca4c91242cc9846e6590c7acfa997a010df", + "description": "panfrost: Add a blendable format table", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "666677d0ef5cfbb585270f21da6818e893c26a4a", + "description": "panfrost: Use consistent swizzle names in XML", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1e18d9952bb4c48f8c2bc3c8df3ebb7bc1024cd8", + "description": "panfrost: Add MALI_EXTRACT_INDEX helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1b48b9e61902a95d4e2d80009c33bc88f44b5d1b", + "description": "panfrost: Don't double-compose swizzles", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b9a136cdd1edaa2e97e7f9745626ff857978022a" + }, + { + "sha": "a575bb136973901bdbe7478c77aa5a0ad5d7464d", + "description": "draw: Remove draw_install_aaline_stage dead code.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a4c1db926cc359254ed4a35cbc77f18c53bdd99", + "description": "gallium/u_threaded: merge consecutive draw calls within batches", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d0916ccb10994e10382a411e5836ada56254e8e6", + "description": "gallium/u_threaded: move a structure up to be used later", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a44868beda5569c674e1cc01f28e97f0d639df7a", + "description": "radeonsi: implement multi_draw for compute-based primitive culling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cc24ec8c0772015d8ef0d10e646e89e9014e9e1d", + "description": "radeonsi: set NOT_EOP for back-to-back draws on gfx10+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ca40dc01cc8c9b7d5cfda6acf35b1b5e561413d3", + "description": "radeonsi: add support for multi draws", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0ce68852c1a8d37e837d64f97a2a2ef5d6b669a5", + "description": "radeonsi: implement multi_draw but supporting only 1 draw", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ae8d89260c6c40864d1a50af51801b7eae6c3448", + "description": "radeonsi: don't check info->count == 0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d9c4ca2b7bc399e7376c47888f46ef5c724a1e18", + "description": "radeonsi don't get count from pipe_draw_info in si_num_prims_for_vertices", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7cc939f7dde4313233dcef7206cbf9440a63c462", + "description": "radeonsi: add num_draws parameter into si_need_gfx_cs_space", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "095ee8f867bb6ac2dbf143579a694fcb9f9e6acd", + "description": "winsys/amdgpu: remove incorrect assertion check against max_check_space_size", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "114a899cc89faea1946ccc3aafbadc25a070f08e" + }, + { + "sha": "73f2d3c291f83fba2c94ab71ae4288ba71955814", + "description": "gallium: add pipe_context::multi_draw", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d9c12c60723b0b2a8f18928beef4284e5136f09d", + "description": "gallium: move pipe_draw_info::start/count to the beginning and pad empty space", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8943c80c9b171c638b98c35a28840f13b66c7860", + "description": "radv: Fix variable name collision.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "eb104e949ee4b7e0813d14f11a4a952bae48fe80" + }, + { + "sha": "41be85ad7bc2bbb8fd55cc4b417527f6fd7ec0d7", + "description": "ci/x86: speed up piglit testing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e54c7f4b1ac6b82769e765da7404f3341853f185", + "description": "android: aco: add aco_form_hard_clauses.cpp to Makefile.sources", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "3dfbed2a87cdac7003b7db533046b633579e8d2f" + }, + { + "sha": "a1d6c03e2f343b0c959e1b96dc8a77c2439a1b40", + "description": "etnaviv: don't import allocated scanout resources via from_handle", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3862cec314de4a82821d9694ae2eec6f629ec782", + "description": "etnaviv: pass correct layout to etna_resource_alloc for scanout resources", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ec21148311fd50f7171c50e921536f5c3e4360b3", + "description": "etnaviv: simplify etna_screen_bo_from_handle", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "94ec412b268967ead688a281d6f270dac8841e5d", + "description": "etnaviv: do proper cpu prep/fini when clearing allocated buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "181790117bdfa98021890e9e9fa2f85a1b1e5a60", + "description": "etnaviv: cosmetic etna_resource_alloc fixes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "98db7c4841c652ab8bccd4dc6c6a4be084e5cddb", + "description": "etnaviv: free tgsi tokens when shader state is deleted", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3fd512440bf094505c02c457f195914239b44475", + "description": "etnaviv: tex_desc: fix TS compression enable", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cf871e92327b18de4ee703dbc89def73844db949", + "description": "etnaviv: update headers from rnndb", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3ba753d9f58aee0bd1e72e835953560fb1fbbe46", + "description": "etnaviv: blt: properly program surface TS offset for clears", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "866bb22d6b91d7a12e62803bacf7d8bc912d3216", + "description": "etnaviv: drm: fix BO refcount race", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8b0218beb5995a6d516da71ea22cbe763ff0c454", + "description": "etnaviv: drop etna_pipe_wait(..)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b7501184b90a20015885b3f2276a7a5ceaef31a7", + "description": "radeonsi: implement inlinable uniforms", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6d058ac6c997efe05bca13ba3f381b4d55a13e40", + "description": "aco: Fix accidental copies, attempt two", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b03c86a71f365e9f0440850761462ddbc442f816", + "description": "intel/dev: Bump Max EU per subslice/dualsubslice", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5957b0c162290e444d9e57dbed07047c421e7148", + "description": "glthread: pin driver threads to the same L3 as the main thread regularly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d8ea50996580a34b17059ec5456c75bb0d1f8750", + "description": "util: completely rewrite and do AMD Zen L3 cache pinning correctly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4f2c2307f9e82498b2374e95aa8a17d0eb80531c", + "description": "util: add util_get_current_cpu using sched_getcpu and Windows equivalent", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9758b1d416a109f92e911d7bac6f00f9419affab", + "description": "util: add util_set_thread_affinity helpers including Windows support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3433d193e7c1ed0ddf24deffd1ed74b5cceddf4b", + "description": "st/mesa: remove random L3 pinning heuristic for glthread", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "96d9f7761d4b313c69664c01682aef8f13bc6c02", + "description": "util: consolidate thread_get_time functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "53a15925da524d871b1331812cd9e91143fadc52", + "description": "util: remove unused util_get_L3_for_pinned_thread", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cc16113202769a0016d3f82351e441dd03261757", + "description": "android: fix libsync dependencies (v2)", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "27b888794624225343287e57f5fcea63f94850b3" + }, + { + "sha": "a09717c4de08b647657073e806bd1d5964212690", + "description": "glsl: add extra pp tokens workaround and enable for CoR", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "43047384c331a9240339563704fe5d97ebe599d7" + }, + { + "sha": "ce0b72a13a2890102e1f324c54735079ca3e30f5", + "description": "intel/fs: Don't emit_uniformize when getting a constant SSBO index", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0fe5490724e3803dd397d9b8a2d29fdf7d5da886", + "description": "v3d/format: use XYZ1 swizzle for three-component formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f7d1460418f11c0209d099ddc552d126a68ba6e4", + "description": "gallivm: zero init the temporary register storage.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "44a6b0107b37ad9644d3435cf6d2d29b6779654f" + }, + { + "sha": "176137948150d153c7756505fc78dcfb13511f83", + "description": "aco: handle SDWA in the optimizer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ecc5b59a7069ab080a892e3f6a413ef62d3afee2", + "description": "aco: don't allow destination opsel for v_cvt_pknorm", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb890f2e7c1bc941cd72483686723fe0fbe51625", + "description": "aco: fix combine_inverse_comparison()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7e4aa8c8e9be44c77872beecced2f8dc9c85aa2b", + "description": "aco: fix printing of some sdwa sels", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "70320f41172dc0a826cef614e3c8b379fdc4444c", + "description": "aco: assert a label only uses one of the members in ssa_info's union", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3dfbed2a87cdac7003b7db533046b633579e8d2f", + "description": "aco: create s_clause on GFX10+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f4c090a3b3ce7180169a85caa6569b052d1f8415", + "description": "aco: refactor split_store_data() to always split into evenly sized elements", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "daec83c7d61a6ee9a2fe5b0116fc38b9672b8bc6", + "description": "intel/genxml: don't generate identical code for different branches", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e96f33cd30087016162b99ee0e1e127e7d3abd63", + "description": "intel/tools: fix invalid type in argument to printf", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "70308a5a8a801a960bb9f45fb597f80b77c51014" + }, + { + "sha": "13859c769f0e00291bda9bd60f0600c45ac17a03", + "description": "gallium/dri: fix dri2_from_planar for multiplanar images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6c118aebb116c6e6d019f150d9b738d9b4054f15", + "description": "gallium/dri: fix dri2_query_image for multiplanar images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0f3594cd7b0871c030aff418b4a1611119f789a1", + "description": "gallium/dri: allow create image for formats that only support SV or RT binding", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "25f984812bef5426440af37f0b6447c8053604d5", + "description": "nv50/ir/nir: don't use designated initializers", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "8850a63161ab58d89c534a1069cda845bf69ecc8" + }, + { + "sha": "e07c5467633431377ce4c9890f96d58cc7a77fdc", + "description": "v3dv/format: use XYZ1 swizzle for three-component formats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d8562b742ead42c9ea7a04007598efcb58a636db", + "description": "virgl: Correctly align size of blobs", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "cd31f46f08a7a60a7f2ca074b817f5fd2034746a" + }, + { + "sha": "7dc17ae5abc4ef85012560e8146def6bdb2599a7", + "description": "glsl: Update loop_terminator constructor to accept parameters.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f74ef158798726a96f542f5f86eb027782c9cf1f", + "description": "aco/ngg: Incorporate GS invocations into workgroup size calculation.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "09b9e52c0d8945afbfa79e5fe8f6db75a02e6980", + "description": "aco/ngg: Export a zero-area triangle when primitive count is 0.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "73449f9a6237fb54d339db08a9912c26cdbb8e6a", + "description": "aco: Add a few assertions about LDS usage.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b6654adc0ea68a76b70d5cf35e31b61651b2f26d", + "description": "aco: Make emitting reduction instructions a bit more convenient.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8d6246205ad3d71efd644e1d1b60827aa6e54298", + "description": "aco: Add some validation for PSEUDO_REDUCTION instructions.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "260f9c503ad3c2459fffbc4dc4583554b2437e70", + "description": "aco/ngg: Put shader query reduction operand into a VGPR.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9757c3cb6b810431f3ea1fdeca97da4aeeef384d", + "description": "aco: Assert that workgroup barriers are not used inappropriately.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "915f2919f64f40f2cd51ae10dc85c685edda97ef", + "description": "ci/bare-metal: suppress 'No such file or directory'", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ecdcf22d5d848edb582ac1c49c6c6de74309a476", + "description": "aco: switch aco_print_asm to a FILE *", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a293fad4ef7f4d941dab480bc84e93e9544a5586", + "description": "aco: refactor repeated instruction disassembly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ed2449d55bef134ed05838926cd0cdca90d3e4eb", + "description": "aco: move individual instruction disassembly to its own helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7ded846ba920c9dfc0d7e24084f2bf48b9169f7a", + "description": "docs/features: Update extensions for swr", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2088f359ff805af1b62becaa6c7f94053213f599", + "description": "docs/features: Update extensions for softpipe", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "483657de3232df26e3049e12e550e4e6db0b4c12", + "description": "aco: use mubuf helper in select_gs_copy_shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ec7ecfe9cbf38c12e64a84dae5fe99b04bb6b073", + "description": "aco: use control flow creation helpers in select_gs_copy_shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "57d977a23fbcf513ebc167def0acc2cbb58bfe99", + "description": "aco: round bytes_written to dwords if larger than 4 bytes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "41839d38cf367baea66dfe2306cc4dd0b4adaf2f", + "description": "aco: default to a definition size of 32", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "768186e2af3bf90d67db65979d240850419d1d38", + "description": "docs: s3tc -> S3TC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d30470e999b3f7d9135e95e33dc48e2aaf370612", + "description": "docs: clang -> Clang", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f3095f9b40c04f13c882ce4d0cc493246adef7b2", + "description": "docs: gcc -> GCC", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4aded097346086f63c6582c341a7195716faa535", + "description": "docs: fbo -> FBO", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e8d0313df54006ac509ca36e8a83f1dafff0189d", + "description": "docs: hud -> HUD", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f408343198acd2ba908e550707045c299566a3be", + "description": "docs: api -> API", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a22cedf4f0fcd79c265a546eafa0018c3f1d9bb1", + "description": "docs: anistropy -> anisotropy", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "689145eeb2bb668004cd21419678ccecf11ef009", + "description": "docs: eg. -> e.g.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "580b9d11ff25a7887c5d69c91b4428fd6a943ca0", + "description": "docs: ie. -> i.e.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fef8a4befdc7f980e4da875cf80bc892cbaf7ed4", + "description": "radv: remove call to nir_lower_pack()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "212be2a04e4669cabd538d23327124fc6b31d6cb", + "description": "radv: lower pack_[64/32]_* via nir_lower_alu_to_scalar()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bd0468ed3384d17c0a74e79852cd8c462c5985b5", + "description": "nir: add options to lower nir_op_pack_[64/32]_* via nir_lower_alu_to_scalar()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "121fa017e1da5dba0f70ffdc39ddb2854ecc6193", + "description": "ac/nir: implement nir_op_[un]pack_64_4x16", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "543f50789a482f32107b1859d0413c817b17bf02", + "description": "aco: implement nir_op_unpack_[64/32]_*", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "79cd22b889b401665cb92a6a4dcafa30c26dc298", + "description": "v3dv: enable alphaToOne feature", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eb104e949ee4b7e0813d14f11a4a952bae48fe80", + "description": "radv: Do not access set layout during vkCmdBindDescriptorSets.", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "29999e6b9d737cdab9510246bc5f780791f7c9dc", + "description": "radv: Fix 1D compressed mipmaps on GFX9.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "1fb3e1fb70844d1bd84bc61de6022b85e4d45374" + }, + { + "sha": "06cf838cbdcb0318441093370dd518839a8c1737", + "description": "intel/mi_builder: Support gen11 command-streamer based register offsets", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fdb1997ab5ea2533af68471cd8e9e357e3f28afa", + "description": "Fix VMware capitalization.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0e7d45c89a578e73f44fe77d1098f75f6045c853", + "description": "util: use dllexport for mingw too", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e030ab51632d1de1859e4683ae0ff260a8d98a14", + "description": "lavapipe: configure suffix in icd json", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3017d884bc0d3b785572df9477a6eb774defb3ef", + "description": "gallium: use libpipe_loader_links", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0355d10c6bd9158cae26696803c2f26112e8a436", + "description": "wsi: move drm code to wsi_common_drm.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "72ce22f99180bf55d83091e83980e8ecac3aaa70", + "description": "lavapipe: fix usleep usage in lvp_device", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d399c3e861a7c459501413e8d675db8ca1a40712", + "description": "intel/dev: Add device info for ADL-S", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "26e53e3afa7ee971fc0e686d2d00c5941264be1f", + "description": "aco: ignore the ACO-inserted continue in create_continue_phis()", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "638cbc21a1c01c87f620edc820e913e48aba2287" + }, + { + "sha": "fa5acbbcdea80ace5232648a7b885d06bfd2872a", + "description": "CI: remove llvmpipe cl flake test", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8d03cfae7c30b06da2353956dc59b97e5b82bf11", + "description": "anv: Drop warning about gen12 not being supported", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a1885332d768f4787a683f1db1baac8992ed5349", + "description": "panfrost: AFBC compress Z16 depth buffers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "388c99b0298737c8aa2be2bd5d37d3fa9e718383", + "description": "panfrost: Z16 depth buffer support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4a20ed6b45af1141402ff58feaba1fc9c93c9b7a", + "description": "panfrost: Move zs format handling code out of the !afbc case", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "06d1f7c64b29ece88c078c0b23d8b4ce70b47903", + "description": "docs: Specify when branch points happen", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "328a3503875c30123108bedd949e34c06831584e", + "description": "vulkan/util,vulkan/wsi,radv: Add typed outarray API", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "437995bb7055d2e75c93e58a4f20af1040d74f8f", + "description": "aco: remove all-undef phi opt", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "70ff262cda8a8e3566f73afec669386ddb4fa70c", + "description": "aco: use v_mov_b32_sdwa for some 16-bit constants", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b882598ee1723358e1e19449cbb2619da7abc246", + "description": "aco: remove some unused optimizations", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d20a752c0de426e4c7c64a4d42d10f373f73c97a", + "description": "aco: use Builder::copy more", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e54c111c4596274e21b9368e6fea503107e9e441", + "description": "aco: always use p_parallelcopy for pre-RA copies", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6db5fbf9f274cf433525628ed7a979f7a05cbd96", + "description": "aco: allow literals on sub-dword p_parallelcopy", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "74e2e9b682afe9f0e49e28facb2ecc387a8b4a74", + "description": "aco: don't use bld.copy() in handle_operands()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a834d9ef86a34dde64a379cae35e987466308918", + "description": "aco: expand vectors passed as copy operands", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e092f34dfa309929419d269af4aa17d8261e2cbb", + "description": "aco: copy-propgate through p_create_vector during value numbering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0f31fa1b645e881e5e293193c85d66f5e7e48c9a", + "description": "aco: skip value numbering of copies", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "72b307a3389b8940ec639c3086fcef79f778b7e5", + "description": "aco: don't do divergent break+discard", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d4503a902057cb5ddb0b2099e3d6df5ef2ba656a", + "description": "aco: update phi_map in add_subdword_operand()", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "56345b8c610e06b2c6ccb0d0975e62f9a008e34e" + }, + { + "sha": "23fb54bf7facf9368a3d8c633d3e4915c426d7ab", + "description": "aco: Clean up some C++ usages", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "79347f5cd4561e5b2b04064b78f2f1224da7e27b", + "description": "radv: enable VK_AMD_mixed_attachment_samples on GFX6-GFX7", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ee1d30ab8a9e53bca2a146e175a8aa6a28ec02bd", + "description": "radv: flush CB before and after FMASK_DECOMPRESS or DCC_DECOMPRESS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3fee91d2e7efb9ec21965a734b4e338bd7965fc9", + "description": "docs: add link to extension spec", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2ff97847d198e27647460af199a44e29fe433b86" + }, + { + "sha": "76f74bd653e35dc332744aa4b17e668f7c21c046", + "description": "CI: Only run OpenCL tests when we need to", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "48e83f76653dadbd332a01ac2ce74f67d90a9bb8", + "description": "radv: do not perform a FMASK expand for non-writeable MSAA images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cb12879401b88dd0712771cac137ed04886b2836", + "description": "aco: fix GFX8 16-bit packing", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "d96f387e7ac448b450091b900ab089eea3eb7b86" + }, + { + "sha": "666817ce846948afb6a01b29aea45d6c58477045", + "description": "v3dv: grow meta descriptor pool dynamically", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6b874eb42c80acefa4d94be2490dcce47e19a06a", + "description": "ci: Run git_archive job if all_paths matches", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e5cd5e9cec9112a71007b88220178afadb6c0133", + "description": "pan/mdg: fix LOCAL_STORAGE wls_instances packing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e4b170a72062e0648ca87a847975717600c20e6b", + "description": "v3dv: enable the logicOp feature", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af0435cbfe61632407ce135fbea9cab6fe1e4fb6", + "description": "Revert \"radeonsi: use staging buffer uploads for most VRAM buffers\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "fd6bbdcf59dc5b87fed31f8fc51a2b27eaedfbb2" + }, + { + "sha": "e29fb8e80f1d4f0f105eb6b5a27bdacd03fb9136", + "description": "amd/addrlib: Initialize Gfx10Lib members in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "006ce7358c113b7c269e2cb3c9e483551e7fcea5", + "description": "freedreno/gmem: Respect max-height limits too", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f5c8ff0ae893d6b936741977696aa81a106bf57", + "description": "freedreno: Rework GMEM limit init", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6c6693e043adc90ce52fcd28d1b0ed8d6b2ade05", + "description": "pan/bi: Fix ms_idx type to catch missing ms_index source", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b83c293674c11de4acec14a45f798cb12566a8e0" + }, + { + "sha": "eaed477652a07d608ea3c85f1b73f8d9c5f16a6e", + "description": "pan/bi: Add ult support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f77bbc92364e9bcf82f892fdacbc090822b358d8", + "description": "pan/bi: Lower {i,u}{min,max} instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1c8b8e3c82e2aa7bbbc9aae3f7b4982eee869ab6", + "description": "pan/bi: Add support for load_point_coord", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "55c0dd943e9ab3eb117509394f958f716e95532e", + "description": "pan/bi: Add support for load_front_face", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a582b53f22c6b7fbfab2dafd257f5bb5c39f8e9", + "description": "panfrost: Preload primitive flags when gl_FrontFacing is accessed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b92eadb29cc8ef09096d9196434d49e35a3eccaf", + "description": "ci: Add \"check mr\" job to needs: of build jobs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "27ce5d921ef00c46d531df1b60f9e7d6d588b0e0", + "description": "aco: remove isel_context::allocated", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6f68cacf619f7f13bc870355709224c1f3e9bbd8", + "description": "virgl: Always enable emulated BGRA and swizzling unless specifically told not to", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3d9ffdcc729b77ecf26aa14436105782763be274", + "description": "nir/lower_memcpy: Don't mask the store", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "a3177cca9961452b436b12fd0790c6ffaa8f0eee" + }, + { + "sha": "67238f95b52024b4e4b2acd0f750c15317ca3778", + "description": "freedreno: Disallow tiled if SHARED and not QCOM_COMPRESSED", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "63bdbfd045de092b0cc59447cd7cfc3c45daa634", + "description": "freedreno: Update import/export traces", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5f686c308ab0589f38c32e8fd500baa11cc37155", + "description": "st/va: fix build with old libva", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0eccd15852b9f1933166447cee8a8409d90bfe79" + }, + { + "sha": "638ebdea78e67d19dda1ac6d2ea5a7c89e5b8d05", + "description": "glx: get rid of memory leak", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "e19644967f20a6baf82c5ffd6973e2b25fac92b3" + }, + { + "sha": "26f58e87a0d8c91d32d9cc680fbb0158891f2c6b", + "description": "mapi: do not return thread-specific data for wrong thread", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "61d40ae4d04a235d13c9602d36574f2186f91f4d", + "description": "mapi: do not call thread-unsafe dispatch getter", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "65d6f258c5242ac26e17fc803818d76e62c40b83", + "description": "mapi: remove unused function", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cf083f1d02b8d46a310f4ade96992a9b2f28965f", + "description": "aco: use do_pack() for self-intersecting operations.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d96f387e7ac448b450091b900ab089eea3eb7b86", + "description": "aco: improve code sequences for 16bit packing", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "40bfb088285a4ffecedbf22742c241900e477d73", + "description": "aco: refactor GFX6_7 subdword copy lowering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "06b41ca589913c7b64b70909a38164e4e269c749", + "description": "iris: add support for fence signal capability", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aa1d298b339fae79341063072d4f8ff2dceea0ff", + "description": "iris: handle PIPE_FD_TYPE_SYNCOBJ type", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bddaa9339c789c24d459e781707d89fb614063b0", + "description": "ac/nir: remove bindless image atomic format check", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ff97847d198e27647460af199a44e29fe433b86", + "description": "docs: document zink's gl > 3.0 requirements", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "be693beef1e71205986cbf2ace1f0be7a79e3229", + "description": "docs: do not document required minimum", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dd0f941097419ef27739cf308af3473dd3895bfa", + "description": "zink: verify geometry shader feature", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "8028991f361f22f20abdbed0c2f61f7c5c23c052" + }, + { + "sha": "82512a163bac905a526449db88705d6534792d83", + "description": "gallivm: lower flrp for all sizes.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b8a9bd9b93680dbdd0b52f042a85aadca8acfd22", + "description": "gallivm: get correct min/max behaviour for kernels.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9845c1636ccab18d9294bb36bc56c17c6a02f706", + "description": "gallivm: add support for 8/16-bit mul_hi", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4b6d3328432410164d907fcc9b674239963766fe", + "description": "gallivm: handle sub-32 bit masked stores.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81d8ca70eb050323248c78186a049c72b6e14509", + "description": "gallivm: add b2i8/b216 support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0f78ca9d54647c3906cfaf0fda43a133af139f0d", + "description": "gallivm: add 16-bit split/merge support.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ecdc5ec4e283b2f85562b7f9200eda7482b4a1c", + "description": "gallivm: fix 64->16 f2f16", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "842a53913c0d5d11f1062ffcf4fb8dbeec33102a", + "description": "llvmpipe: fix 8/16 bit global stores", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d1ea49d9244126fa4ec6acdf4ae0792fa788cfd1", + "description": "anv: report latest extension spec versions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "303a74c52c0692b4aa86a06958861f980d728f9f", + "description": "android: panfrost: Move nir_undef_to_zero to util", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "86b2b4eb768bcee6664790a66fc5508002e34424" + }, + { + "sha": "6a189c89f8b748845b9e33127d44a1dd532c32c8", + "description": "util/xmlconfig: Disable for Windows like Android", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fb56fb02a1e9a94144bc2d5fb148e343c19a930b", + "description": "gallivm: add load/store scratch support.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9c1f6ed80441bcd1cb4fec7a12201240e9249ea9", + "description": "frontends/va: Initialize drm modifier on import.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bef6007c3aa40eee069250bd796e72f6d8a82341", + "description": "gallium: update abs_delta segementation parameter", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9c67f3d723abbd7ccffda116d7e2c82a5a116567", + "description": "frontends/omx/enc: fix omx h264 encoding force-keyframe-period issue.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "390887ff586221cf36b001390b9542efb4e3205b", + "description": "mesa: Remove the key parameter from the _mesa_HashDeleteAll callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cc6dcc6ab4f025a02f57e70b2db112fe99373387", + "description": "mesa: Remove the key parameter from the _mesa_HashWalk callback", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ca752b08c90f22deb3854dffce46f535c8e430f7", + "description": "i965: Get the gl_perf_query_object Id from the object", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f71488d1d106cad5f888b28fba938a842a512298", + "description": "mesa: Store the atlas Id in the gl_bitmap_atlas structure", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0e17bb509e6d3bd767ebb29894467e3394f8ad38", + "description": "mesa: Open-code hash walk in _mesa_HashPrint", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3c2489d2e45b3013361c7284ed9de14fe40554cc", + "description": "amd: print NUM_PKRS with AMD_DEBUG=info on gfx10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "989cc76f0c90190b1894bd7c80b80ccf96ae2ebd", + "description": "amd: replace 0x028848 with the register definition", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e72b76b94bdfff49bbd4aefb5b489599a39ca2fb", + "description": "amd: update gfx10-rsrc.json for gfx10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e477255271a2a21e5f68117ef9b59ea0a9c1332d", + "description": "amd: correct typos in gfx10-rsrc.json", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "19e32793b619cd6686560c39dfa2b2437989a82f", + "description": "amd: regenerate gfx103.json from kernel headers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9d3b802c6a999332181078575e0581f6739dcc25", + "description": "scons/windows: Support build with LLVM 11.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f21dd3bc5a06e700988926522f81727efde7123c", + "description": "scons: gallium/auxiliary: Unconditionally compile NIR regardless of LLVM", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "fa483d8cd1bc975c339082435a2e0f657959ea39" + }, + { + "sha": "3ba786f6243ec4e4dcca9a568c4231f492209f1a", + "description": "spirv: Fix OpCopyMemorySized", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "8323c03bbfd4501ab54fbd1a8373b8c54017806d" + }, + { + "sha": "a071590b1d13457640f2b02f067854d26d666675", + "description": "frontends/omx/h265: Check the pps set before the scaling data", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "08762d5171bfcc94a36d778679abbf05d5045a51", + "description": "frontends/omx/dec: Use the known codec profile when allocating buffers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "56f90a6ac1946adbac3cd43102479158efd58bb1", + "description": "pan/bi: Account for bool32 ld_ubo reads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3cdca1514f601fbd5226ed7026ef35cd6dd2a2f1", + "description": "panfrost: Don't advertise MSAA on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f0e65805cb83cdf7ceb7fce9c02f537c39558d28", + "description": "panfrost: Drop PIPE_CAP_GLSL_FEATURE_LEVEL for Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9a136cdd1edaa2e97e7f9745626ff857978022a", + "description": "panfrost: Implement BGRA textures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "048e4315426f3f3f11691f722666ed15ba688cce", + "description": "panfrost: Fix component order XML", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "8389976b7c09d6bb7ffb9964ec8748ecc3a9862a" + }, + { + "sha": "25b66e61f736f638eeca950ac5e3eb406f12e249", + "description": "panfrost: Calculate thread count on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fb32406e566f262f572d201ce4ad51f1647be583", + "description": "panfrost: Don't export queries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f4ecc432bf7603b86935580785a3015fc92d1b15", + "description": "panfrost: Record architecture major version", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81b28ebcb5a3801216e752cfda2e600bbc7bcb10", + "description": "pan/bi: Use nir_undef_to_zero", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "86b2b4eb768bcee6664790a66fc5508002e34424", + "description": "panfrost: Move nir_undef_to_zero to common util/", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f0421099efcece38d866fc749663c61fa880f2cb", + "description": "pan/bi: Pipe through tls_size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b43b1535e25b3e7ec59065235ccc466c7de42dd3", + "description": "pan/bi: Implement spilling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e33b2976f3adcb8c7185265efe2812d91b33ab64", + "description": "pan/bi: Pack LOAD/STORE", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "74be83d876f8e0b7e566a02920657ae21f205814", + "description": "pan/bi: Add bi_foreach_clause_in_block_safe helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2ff54cacf0e23515a3b9adfcf01dc21b1bf7e1fb", + "description": "pan/bi: Factor out singleton construction from scheduler", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2d0f46a181b4fdd6411e53992d073e1c10f94179", + "description": "pan/bi: Implement bi_spill_register", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "82dbc4ea782e68fcb9bd3400816911ca948de01d", + "description": "pan/bi: Add helpers for working with singletons", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1db83fc75d95a8f5c787e9bd3b75ba235831dd46", + "description": "pan/bi: Add bi_rewrite_index_src_single helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9e915fd5f743a490289d6c8f3a9c9700e02b3475", + "description": "pan/bi: Add bi_fill", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "486a820bc675a84546cbc88e24637d9a9edc789b", + "description": "pan/bi: Add bi_spill helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "171bf1991719f34807274c77a8186681e617aa7a", + "description": "pan/bi: Add spills/fills parameters", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7246dd88cda034578f1c2dc7f859cec7fadcdb33", + "description": "pan/bi: Implement bi_choose_spill_node", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ab9abc9052f1fe3b2067e6513114dda3c24decd0", + "description": "pan/bi: Add no_spill flag to IR", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ad8a8499c9b95072d4bb95bed05bad67cf0a7614", + "description": "pan/bi: Stub spilling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8477678cfeb3ab0d80983ec5901742baf786d8cf", + "description": "pan/bi: Fix handling of small constants in bi_lookup_constant", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "82a62a6f3313a20a090806e3f67536e846ee2d07", + "description": "pan/bi: Drop 64-bit constant support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d2bfcba54d8138a848b5d2763a0eb405f18a949c", + "description": "pan/mdg: Cleanup mir_rewrite_index_src_single", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b19d9c86a878d71473d24cc4cabb606a09e35b68", + "description": "panfrost: Drop panfrost_vt_emit_shared_memory", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e6152091ca99c3d8491ac48baa1f301da6f4a4c3", + "description": "panfrost: Use canonical characterization of tls_size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a33827d3d3186bf7cef5dafef44548fa35676eee", + "description": "panfrost: Get rid of the non-native wallpering bits", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c89f659f03ff20a9791e2efdc8f5960965eb1164", + "description": "panfrost: Use native wallpapering on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "edd98aac3f16f216fa157f031f4c10fc294e7c8e", + "description": "panfrost: Add support for native wallpapering on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a677e34e1fc208d5a9dad3c96e276df3b4e6eb09", + "description": "panfrost: Split panfrost_load_midg()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8892c9cde1b14229a89eb4dfb2adf693b8e9369e", + "description": "panfrost: Pass the texture payload through a panfrost_ptr", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3f91d819450386676ab7997b5c50320c3f248135", + "description": "panfrost: Rename gtransfer to transfer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1b3b289c5c1e996bdb3640dfcfaa516e16da7d0d", + "description": "panfrost: Rename panfrost_transfer to panfrost_ptr", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf3cd28319cbf06b6bc97064dfb0cd054700c274", + "description": "panfrost: Use real name for attribute's unknown field", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6b68c821d11f58d7cec3f48352870c49f439ac40", + "description": "panfrost: Build blit shaders on Bifrost too", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "69c864b0b92da981b169cf879f7718e3d2c458c0", + "description": "panfrost: Make {midgard,bifrost}_compile_shader_nir() return a program object", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b83c293674c11de4acec14a45f798cb12566a8e0", + "description": "pan/bi: Add basic support for txf_ms", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d33c8afe9b5773419665a82a9780c3194177bf84", + "description": "pan/bi: Support the case where TEXC needs 0 or 1 staging reg", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4aff27a68a174dc0b8b373dc7da096e295921b55", + "description": "pan/bi: Add support for load_sample_id", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e2d615674222f2ab8d04655156bc49de0769445b", + "description": "pan/bi: Print blend descriptor source properly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "74c158011d0cb8581bdaa337288a084859d89bc8", + "description": "pan/bi: Make sure we don't print special index as a register", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a194dcc827ec441eb5e94b0d5fceaeed349ffe34", + "description": "panfrost: Replace unkown renderer state fields by their real names", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7486b5d91ee0edec013cbc0aa9751ae644e52e67", + "description": "panfrost: Add specialized preload descriptors", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d769697f35c57bc1afdf00036d7ba0d9578047ca", + "description": "panfrost: Add the bifrost tiler internal state field", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f1947b39ca5426f1bb501d22cf3dcae9ae411ea", + "description": "panfrost: Fix tiler job injection", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "519643bbe0c600ec473118ccdaef333fa0245d20", + "description": "panfrost: Adjust the renderer state definition", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f2ae8d116ab05419764be444f0615f0310ae9936", + "description": "freedreno/a6xx: Implement user clip/cull distances", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b4224c39e1295c8ed38a6194efde77a874626436", + "description": "tu: Implement clip/cull distances", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "47f825ac6362b522ce0404c7224680c7574bcbb5", + "description": "ir3: Handle clip+cull distances", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9e063b01b77059d13756794f95fa0eb0e5ef6633", + "description": "ir3: Switch tess lowering to use location", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ca38a19957f88523aef8761d68db413ebb1d562", + "description": "nir/lower_clip_cull: Store array size for FS inputs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cb6ce4a265ba64c172829a3d6d2e13a7d920133d", + "description": "iris: fix the order of src and dst for fence memcpy", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "6b1a56b908e702c06f55c63b19b695a47f607456" + }, + { + "sha": "d972a6ac4c8e16bc656d87620fdb298aa6059c68", + "description": "nir: get rid of OOB dereferences in nir_lower_io_arrays_to_elements", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "33c9d4bf3152cdfff694a1572012631b120b6731", + "description": "radv: replace RADV_TRACE_FILE by RADV_DEBUG=hang", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9516f9369e4e9f16748afff81c02b9f225c0f124", + "description": "radv: re-order GPU hang report dumps by usefulness", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "719b9b3e27e847c83a4d80faf449ad661ff496e6", + "description": "radv: dump GPU hang report logs into $HOME/radv_dumps_", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ef6d374307b8f2ef0f5f9b92c4c7fae76b0e9551", + "description": "radv: dump UMR ring and waves into the hang report", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "18477241c1d84fb5cbef4285dced7c5900302290", + "description": "radv: add radv_dump_cmd() helper", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf66bbda8095c8e07ae3551235810c51fdae7817", + "description": "ac: add an option to dump GPU info to a file", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5f16c5b6f96640953226f7f9a2da438616a26c69", + "description": "v3dv: properly describe swap_color_rb", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d8cabe0cbe2d8eb2a2d0abc6cff6717544384d03", + "description": "v3dv: compute swap_rb flag after applying all swizzles", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "40788be13432a8cc9a2e347a208a36c260fcb09d", + "description": "v3d/compiler: fix BGRA vertex attributes for vec2/float size.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c15468d782b91b712032396185be578f13c777f3", + "description": "broadcom/cle: fix vec size dump when set to 0", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "53fc3eb4a223839771a1afa91107c634ca31b60e", + "description": "glsl: Initialize lower_shared_reference_visitor members.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0eccd15852b9f1933166447cee8a8409d90bfe79", + "description": "frontends/va: Return P010/P016 as possible surface formats when encoding", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b50f40fa5a9ea5c807803aa5beccff1ba6fbeb5b", + "description": "src/util/disk_cache_os.c: Add missing headers for open/fcntl", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ddca93ddf70dc76eddb0aebb80de4b19a9b134b2", + "description": "anv: Enable stencil buffer compression on Gen12+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dc22d6b3ab121569a95300ac7ce66679c2cfd927", + "description": "anv: Pass correct stencil aux usage during MSAA resolve", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "993a2a71224e2063a111ecc1448788dddee0fc75", + "description": "anv: Return optimal aux state for stencil buffer compression", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "815e6c8ef4e39e8735a6f608017e370271c0badb", + "description": "anv: Don't track clear bo for stencil buffer compression", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d34ab5071a866282b62ff3719441592d5346bf26", + "description": "anv: Get aux usage from plane while clearing stencil buffer", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c76ebc0c7a6db701ca18bfa4e6ae86b40d93a322", + "description": "anv: Set stencil_aux_usage flag", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "be2ca24da55974a8129c0981ce7a8277cf5fc7bd", + "description": "anv: Handle compressed stencil buffer transition on Gen12+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c2a41028485d5ca0a67a4396ac7d8e491c688109", + "description": "anv: Return number of layers/levels attached to anv_image", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "67956689bb8d9bb78f62286f94ca0f1783cecc6a", + "description": "nir: Rename replicated-result dot-product instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8cee9ce750e7edc6e3b38de09628dba679b3391f", + "description": "spirv: switch to util_bswap32 to improve portability", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "fb6b243c113a2bab9bd63ea98381e1eeeecc8421" + }, + { + "sha": "7e28fbd8bc558ca8c205fdf0e38c876f4b2c496b", + "description": "ci: Add the new timeout-prone softpipe-gl test to the skips list.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "18cef9c5149523d49c4131000eb6d6b9cf0d04a9", + "description": "docs: Fix \"Hosted by\" link and drop duplicate.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1dd6495f78c361f70d2e75e2d0100dc5ed674656", + "description": "docs: Add a link to the linux kernel DRM docs under \"Developer Topics\"", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f39dac6e891112ba353043763aeeb134c6ce1bf", + "description": "docs: Drop extra link to old DRI wiki in the \"Help\" section.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "31e9de9c8ac72399427cb0fc15f19205dd8182c9", + "description": "loader/dri3: Allocate up to 4 back buffers for page flips", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "16a7cc4d449efa51f9ec2fd47df38a76c12e8e52", + "description": "loader/dri3: Keep current number of back buffers if frame was skipped", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "60585fc4e34858aa277286209f3cf61e83770181", + "description": "loader/dri3: Only allocate additional buffers if needed", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ea326912575fad09af59486ad62d126c4ea0ede7", + "description": "anv: fix source/destination layers for 3D blits", + "nominated": true, + "nomination_type": 0, + "resolution": 3, + "master_sha": null, + "because_sha": null + }, + { + "sha": "87934f02f9da94f1a493096049c229b973e4785c", + "description": "blorp: allow blits with floating point source layers", + "nominated": true, + "nomination_type": 0, + "resolution": 3, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e067078fcd2820bf9e1f805f35d9d913acf90f69", + "description": "blorp: identify copy kernels in NIR", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6694b4276b8941cd358ee1f97210210c332eac01", + "description": "ac/nir: abort when an unknown intrinsic is reached", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fa490cb6ac39ca6f37eb73db492d17b84ff7bfce", + "description": "ac/nir: ignore set_vertex_and_primitive_count intrinsic", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b739bb71680cd33a31e3b02e6e66cf154738a662", + "description": "compile/nir: Correct printing dest_type", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "0aa08ae2f673a36709c5485679d4c89a747ec0e9" + }, + { + "sha": "4e2fe34aa9944612f2224188317a4ad2aee8035b", + "description": "aco: fix determining if LOD is zero for nir_texop_txf/nir_texop_txs", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "93c8ebfa780ebd1495095e794731881aef29e7d3" + }, + { + "sha": "4735c8a522c3607058fd8c3162c7376790144b5b", + "description": "nir/loop_analyze: adjust force unrolling to only include interesting modes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "542feb9ab6f22eaef7b6a7f77178c77764c265c7", + "description": "ci: disable check commits job for now", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8f9c5964ab3c7aec362e59a33f0fafd31199a0b9", + "description": "v3dv: handle buffer to linear depth/stencil image copies in blit path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "61b8a49e86d2c863b1132534e982d1cf0ba701f2", + "description": "v3dv: handle compressed image to buffer copies on the blit path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "462429b4b8b8f217ef702592824f037034440267", + "description": "v3dv: fix Z coordinate for 3D blits", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a18411d9c609a852180b7fd08187097afa224ade", + "description": "v3dv: do not attempt to blit from a linear image source", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2118c9b562dedac1b680ea23c98f000b5e6d3d17", + "description": "v3dv: fix multi-layered buffer to image copies on the blit path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0a4fc196058a93f59812036bd615036b1ec4a2e0", + "description": "v3dv: skip unnecessary tile loads when blitting", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c83d6ffa3222b52219406cb6cb4e05232d6fe054", + "description": "gallivm/nir: handle nir_op_flt in lp_build_nir_llvm", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "91b64da3de512a207f08fc4700178a3e2524682e", + "description": "driconf: allow higher compat version for Brink", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "edd0b00c2befc42966145e3a93937fb707f83470", + "description": "driconf: disable GLX_OML_swap_method by default on Brink", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d2cb3f862f20442e477e15637237c92253f6bf07", + "description": "driconf: add a way to override indirect-GL extensions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7eedc79658d47bf336a0efa1074634d9e5f4fb2f", + "description": "driconf: add a way to override GLX extensions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "97f6d5e3a99d7093ec732dbdc319159bb1a1b0d4", + "description": "glx: let users force-enable/disable indirect GL extensions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e19644967f20a6baf82c5ffd6973e2b25fac92b3", + "description": "glx: initial plumbing to let users force-enable/disable extensions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "74722c3ab910921296a87596ea6d7460e1f75818", + "description": "glx: rework __glXCalculateUsableExtensions to be more readable", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1331b86299b2404eb231b1ae1c6564de5c80e466", + "description": "glx: stop using hardcoded array sizes for bitfields", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af25f47bdce187e76789fa9983096516e25998c0", + "description": "glx/extensions: split set_glx_extension into find_ and set_", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ba255dfaaf39ec9835ecb244aa7bf5a2362b5ab", + "description": "dri/DRI2ConfigQueryExtension: add support for string options", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f9763b20542619bec57163ae45480626c76d4378", + "description": "driconf: initialize the option value before using it", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0e3a424f002d04a319dfdfc363d1774790bc1193", + "description": "driconf: bump the maximum string size from 25 to 1024", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81797fc863c5eb4ec976e8839845926d89a16be6", + "description": "android: util,ac,aco,radv: Cross-platform memstream API", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "af8d488ea5e13219c67c132a3edfa2e7a698746d" + }, + { + "sha": "025050bae73d0598d788e3c307328670a3bf51c1", + "description": "glsl: Initialize ir_if_to_cond_assign_visitor members in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8cf00244328c1697b93ed0eb57e009f283d03269", + "description": "nir: Use a switch in nir_lower_explicit_io_instr", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8033200560f6d76b1f84480cc0df809112389686", + "description": "docs/v3d: Add a little stub of v3d documentation.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "97dd039191d2df4e343ef5b5e5cd18d71fb4cba0", + "description": "docs/vc4: Add information on the hw documentation available.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c4290a52ddbe11a5e78179392ca47467b17a46ce", + "description": "docs/vc4: Move my old vc4 wiki's documentation into docs.mesa3d.org.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4a4b854aa0286bbd3eede784a915e557bcfa138f", + "description": "docs/vmware: Move the vmware driver docs into the drivers section.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "313f951f1bd5197f376f851c0f6b4414a09f92ab", + "description": "docs: Move the gallium driver documentation to the top level.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9a644d701730cbec1a8f4d7caa94a72f21ea8e72", + "description": "docs: specify redirects in conf.py instead", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "47f52e83d1074b3833a4a4c98b87ac23c04a38f6", + "description": "docs: specify redirects relative to docs-root", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7daf953bc0e67f4f205560138f8fdedb2ec6ca57", + "description": "docs: verify that targets for relative redirects exist", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a85c3189fa7f5b2a40b49ac953d88cf8c8ca8206", + "description": "docs: create leading directories for redirects", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b2c737cf57382d543002177b6e4810b19ab62c74", + "description": "virgl: Fixes portal2 binary name in tweak config", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "9760a7ed9138b84d83a712afeb0e673399d9e200" + }, + { + "sha": "3d51c27de5576b9a62761726f33b0cc7a66da1d2", + "description": "st/mesa: initialize lower alpha func to ALWAYS", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "33ccf0e9bc5cdfbc9b2bd821c7a31d48043a8cf8", + "description": "nir: drop unused alpha_ref_float", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "42ee423e3ac1a3d24d55c9044cf3d98be0280811", + "description": "nir: drop support for using load_alpha_ref_float", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1404b8b1e5b6c517851d3296feb659c1fa4cfdef", + "description": "vc4: do not report alpha-test as supported", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8ad931808e2d3f54b4bf2ddbbaa632be1018801a", + "description": "v3d: do not report alpha-test as supported", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4c41d1900e58cb09501cbf113568645e70634aff", + "description": "ci: Add jobs running ci-fairy checks", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7b09d501e4511d0213f1a52c1fc09d7bede3ae85", + "description": "ci: Add empty needs: to pages job", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9d1d3a89c49b4afab3e6ff7fcca2ac14347d4dca", + "description": "ci: Move test-docs job to deploy stage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "549ae5f84375dfadb86cfd465f0103acfae3249f", + "description": "st/mesa: make sure prog->info is up to date for NIR (v2)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1126b5cd2f14776b1554591e5cc238e05882b5ef", + "description": "Revert \"st/mesa: don't pass NIR to draw module if IO is lowered\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "493fdcf44647ee471e934de3e63d193c6a3b6ff0" + }, + { + "sha": "233520035a0cf3bee7ed58de6a4fad6cc7e097f0", + "description": "nir: consider load_color intrinsics as both inputs and sysval in gathering", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c69849ef803bee8296c961df355499b42c76e973", + "description": "amd: update addrlib", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e6e3d8951ab14cb4f4f764e9957297485bc12d1a", + "description": "glx: Move glXGet{ScreenDriver,DriverConfig} to common code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ce8530d6c9292d5a0c82522e5fd05831403a9c70", + "description": "glx: Delegate the core of glXGetScreenDriver to the GLX screen vtable", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3bb7ebfc7504af0e85596f3ad4b72a2ede10d227", + "description": "glx: move __glXGetUST into the DRI1 code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eb6877d3af337f76143755606d4a7aea132526c7", + "description": "radv,aco: fix use of texop_samples_identical in the resolve meta path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eacdad7ea0df5d9804282f697b3cfeebf88a1325", + "description": "ac/nir: do not sign-extend the result of texop_samples_identical", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "e690a1b78bf902e0f39174ccef8a8caaa2fe2f6e" + }, + { + "sha": "11d70e7ab963aa96c6c61af8a011de647304bed7", + "description": "docs/features: add v3dv driver", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "354e375c9cc937ecfafd7b98a768621c53458528", + "description": "frontends/va/postproc: Un-break field flag", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "78786a219ea2322af09576472dcc2d6d01cb9060" + }, + { + "sha": "fd038132de01c7bc37ccbc96fdc556050644a09e", + "description": "aco/isel: Miscellaneous cleanups using the new Stage API", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "34bc9477de18a92e76ea7c536940a631323a83b6", + "description": "aco: Clean up symbol names and comments related to NGG", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "86c227c10cb729aef05bc8a26b56957e3fa7683b", + "description": "aco: Use strong typing to model SW<->HW stage mappings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fdbc45d1d46341b7efd596e5fb5ff9d242f649da", + "description": "nir: Only validate in passes that might have changed things.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c54c42321ea5a3d9a09bbe89c00346f8c26b9300", + "description": "glsl: relax rule on varying matching for shaders older than 4.00", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "18004c338f6be8af2e36d2f54972c60136229aeb" + }, + { + "sha": "c97e82bab50a2ec2d9d6e73a68c59c544526d01c", + "description": "panfrost: Only enable occlusion queries when active", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "04e955f19bb760b88acad414425cd4708a58e58e", + "description": "panfrost: Precise occlusion query support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "584b1070372a0e010a0e9a221493e5042575abb1", + "description": "st/mesa: Drop the TGSI paths for drawpixels and use nir-to-tgsi if needed.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "57effa342b75a2ae681f2a7665925022dd6e4aa9", + "description": "st/mesa: Drop the TGSI paths for PBOs and use nir-to-tgsi if needed.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "da4c4c0e6785d48b6aa053766e73e80df292ce82", + "description": "gallium/ntt: Add default compiler options for non-native-NIR drivers.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f5841c343b44709cc8820b00e2fd411153a34931", + "description": "v3dv: Fix assert using assign instead of compare.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "ca86c7c65a824d6dc4757b0dba82490acc85cee5" + }, + { + "sha": "e7ce74497e5f1847eaf78002a37571866fee4097", + "description": "radv: Advertise VK_KHR_shader_terminate_invocation.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf955d5f0a3590b05f8949cb5858dcd9a36043c4", + "description": "amd/llvm: Add VK_KHR_shader_terminate_invocation support.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "76421667ecfe9b5f64dfb756769d1f12f4995935", + "description": "aco: Add VK_KHR_shader_terminate_invocation support.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "22a6396340ce9a0c56d55eade3bb2370ae8a8bf0", + "description": "ci: Enable Werror on meson-arm64-build-test.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b03fdca2e003d4b866bf2a2d14b1a475588e0500", + "description": "turnip: Add error path handling for descriptor pool init.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d384f3be4c0c753c38498f9cf8f66dedafda0024", + "description": "turnip: Handle the error path for tu/drm's vkResetFences().", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "296468ef1ac5bef98afaa2ebc141e3bc53af2667", + "description": "turnip: Handle some error paths in allocating CS space from a command buffer.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b156ef57bf9a5eeae89b3222bf6b321faa2471f", + "description": "freedreno/fdperf: Silence a compiler warning about current counter.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a512e9eecd80a171626d3651b9d9e1f2e31dc147", + "description": "freedreno/tools: Fix compiler warnings about using sz in the error paths.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "91c5bbc128b13e14ecccd06d6004c364bc2ebc6e", + "description": "freedreno/cffdec: Fix format overflow warning.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e4dba528cad65ab3a7ef514abd3598bf175c416a", + "description": "llvmpipe: enable CL images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cda192dc1579d260348c3ed0fccdd097e0544eeb", + "description": "llvmpipe: fix sampler/image binding for clover.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "874371876e2faecf62e057016f35660dd6d8761c", + "description": "nvc0/CL: enable images", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ee035c75d42606a18ad54bd45ae1a751c421c98a", + "description": "nouveau: hide SVM support behing a variable for now as kernel space is broken", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1a775b71ca588a3b9ffc15b5d892739fda41e5d3", + "description": "clover/nir: set kernel_image cap", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4f24dee22a7036f90487ca01fc25f7122767cbed", + "description": "clover/nir: Add an image lowering pass", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5e31fad8c9c07d984837a07010c6a9dcd2a2aa97", + "description": "clover/nir: Calculate sizes of images and samplers properly", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6c46e8408ba0d3a14da113b8db679616f22b04b", + "description": "clover/device: use PIPE_MAX_SHADER_SAMPLER_VIEWS for max_images_read", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "43a42b6e1d063ba86cd9af342b2d3a9768bfae8b", + "description": "clover: clCreateImage: calculate image row_pitch and slice_pitch when not provided", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f2bdb69218829551f7c1f47a77ace122c9da619b", + "description": "clover: support custom driver strides", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9583ce04db3ac4d892eedfcc635c86eca41958d4", + "description": "clover: validate image_row_pitch and image_slice_pitch in clEnqueueMapImage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1ff4db1935111593540dd0e1341cb89aa7a67b36", + "description": "clover: Fix incorrect error check in clGetSupportedImageFormats", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3aead7198bcbb5ed0b58b3c84119f1ce28793f70", + "description": "clover: use pipe_image_view for images instead of set_compute_resources", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "eb965719abfe740a09c839565a3f0d9f286be130", + "description": "compiler/types: Allow images and samplers in get_explicit_type_for_size_align", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0021d3ae87a5c27d084de6ecb6cd99235466774b", + "description": "compiler/types: Assert non-zero alignments in get_explicit_type_for_size_align", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ef68f740a67fd1824e44a67d1023e7937fce0cf3", + "description": "nir/lower_io: Assert non-zero power-of-two alignments", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "589d918a4f9da83e3ae441aa674372a141d23349", + "description": "spirv: Add 0.5 to integer coordinates for OpImageSampleExplicitLod", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "701cb9d60c0350b0134e52cb1b51eefbbf27bd22" + }, + { + "sha": "2015a109ff287cdf04607d5acc540aae2e87daa3", + "description": "anv,iris: Use the data cache for UBO pulls on Gen12+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cdc546ae7fe7477d97cf7b09ac8db0e44a3cfc45", + "description": "iris: Flush caches based on brw_compiler::indirect_ubos_use_sampler", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fa483d8cd1bc975c339082435a2e0f657959ea39", + "description": "android: gallium/auxiliary: Deduplicate nir_to_tgsi.c inclusion", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "d0f8fe5909107aa342f62813ced9ce535ed6da32" + }, + { + "sha": "0f82c99c4e62d999efaa4d9355c151d00d1931ac", + "description": "docs: Document how to build and install Android drivers.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c4f8d421df2e62bf02063906ce722f3fe25cbea7", + "description": "meson: Don't enable libunwind by in 'auto' mode on Android.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f2a27d72de5bf1452f187779f5a981bb89e3ae8c", + "description": "meson: Don't try to build GLX by default on Android.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e3c330c7b21f36aef872b130e5f79a9fcaf99506", + "description": "freedreno: Use Android's libsync instead of libdrm's.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "81a0f1eca24b8605b9f2506122ff90db7129c39e", + "description": "meson: Only require libexpat when a part of the build needs it.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "442a7696004c5998d84a3df07d37fa9064dba214", + "description": "meson.build: xxf86vm is not needed for -Dglx-direct=false", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d0f8fe5909107aa342f62813ced9ce535ed6da32", + "description": "softpipe: Switch to using NIR as the shader format from mesa/st.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6444f5702dcac87c41e9c7af66c1762c7a48f3a0", + "description": "softpipe: Fix buffer overflows in SSBO atomics.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "34cc6a804ec92b5e74cde5b346828874737276a8", + "description": "gallium: Add a nir-to-TGSI pass.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d867e7c97482ee4682b59c5d17ef42232b480f36", + "description": "nir: Add an option to not lower source mods for f64/u64/i64.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c730feacc0dea9749d8ff070b2d45fcd0cfeaf51", + "description": "nir: Add a call to get a struct describing SSA liveness per instruction.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a206b581578d585d845250f62dfb1e6684ddf2f0", + "description": "nir: Add a block start/end ip to live instr index metadata.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f5d18403a4d51a2cd927c141884361850bad41d", + "description": "nir: Replace nir_ssa_def->live_index with nir_instr->index.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b6cb184e86dd381dcc1c90c8273b92ddc6d51ed8", + "description": "nir: Introduce nir_metadata_instr_index for nir_index_instr() being current.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b05c107d74487c7e60d8fb44b9375b6551b334ac", + "description": "ci: Enable NIR_VALIDATE everywhere.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ca103077454beca6d94321a050e2dc27880e0ea", + "description": "radv: move all NIR pass outside of ACO", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9aa89b36fc413c34a14dfdb47fe7792d43cce7cf", + "description": "ac/nir: handle non-const offset with txf/txf_ms", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e690a1b78bf902e0f39174ccef8a8caaa2fe2f6e", + "description": "ac/llvm: don't lower bool to int32, switch to native i1 bool", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ed0eb511d9c342f1cd308a14a82782c965e06ffe", + "description": "util: Fix rwlock Windows include for MinGW", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "afeb0c30226c38142ce37ed3f1ddfd87892a057a", + "description": "genxml: drop gen10", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6d32fcaaafb4b26b2459bd01380eea0a00fdd3da", + "description": "Revert \"radv/aco: disable NGG GS support because it randomly hangs the GPU\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "b84d1a0c42c5a1973ebc53a49fd941590e540a82" + }, + { + "sha": "d8435c1628ce6659e28c0f0625c9af42f3a19860", + "description": "aco/ngg: Add assertion to make sure we always know the vertex count.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d9cb9ff41489bb56a6e2ac109d93db2d5bb7a377", + "description": "nir: Emit set_vertex_and_primitive_count for inactive streams.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "07fae31e8cbe216884be6ecb826cbb5fda55c3bb", + "description": "radv: add missing 'discardtodemote' option in the debug list", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "440813114281bfd05c2b0d26c6488d1410790be4", + "description": "vulkan/wsi: fix possible random stalls in wsi_display_wait_for_event", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "da997ebec92942193955386535813374286b7fb1" + }, + { + "sha": "85053c016c898455434f6264981a2ddd4b60f04e", + "description": "panfrost: Fix stride for AFBC_FORMAT_MOD_BLOCK_SIZE_32x8.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "8bb1d61f276c3ee8e546c861b86cd6e6f3dd1b6f" + }, + { + "sha": "8dd03a7c125f8e7f3d29d0fd131f7129cddb5244", + "description": "anv: Advertise VK_KHR_shader_terminate_invocation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8251eed83e72317db3f0fc2d5edad4e42b136561", + "description": "vulkan: Update XML and headers to 1.2.158", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7e55f0e17dfc11717513fee1f62772de49eddb66", + "description": "clover/nir: add a constant folding pass before lowering mem const", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aca31baafc09634d69134d7ccbdfe6d426cbbbff", + "description": "isl: Enable Tigerlake HDC:L1 caches via MOCS in various cases.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "02fe825a61344a065260da40b72852cce800ac01", + "description": "isl, anv, iris: Add a centralized helper to select MOCS based on usage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "103ad427bc7f61015cea65f71486355644c276b3", + "description": "anv: Set only one ISL usage bit (RT/texture) for CopyBuffer sources", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4df98c3c0ca14b3fc6468ef792bbb7484d1e8c47", + "description": "turnip: Only link libdrm in the DRM case, not KGSL.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f63ce9bbe0c7c0157dd4ef22acfe9f412a8ec0b2", + "description": "turnip: Don't link the WSI code if we don't have a WSI extension.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8f3313fb4797bfbc597654d045fff54948749dd1", + "description": "turnip: Use Mesa's libsync.h instead of libdrm's libsync.h.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8a72666e91bd14e8ff1b8246ba636189abe8ddf3", + "description": "turnip: Drop a dead error checking path in device init.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "27b888794624225343287e57f5fcea63f94850b3", + "description": "android: Add pre-4.7 Android kernel compatibility to our libsync header.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e8c89a9bcbaf85bd8772991151cbef7c0a2ddd76", + "description": "util: Import a copy of drm's libsync.h", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ab57df26225ef99f91e179dc53ebeabfaaaf92fa", + "description": "ci/android: Switch build to using platform SDK version 26.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e92f4ac9f46848a5d0eab482b867c1f3f1c15b68", + "description": "android_stub: Update platform headers to include gralloc1.h.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "72799886e7e10202e2756ad93fd93d59b11f900b", + "description": "radv: report latest extension spec versions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cf17d6251653f4a98e7c4f904ea2f0bc0ecedd5c", + "description": "meson: Add xcb-fixes to loader when using x11 and dri3. Fixes undefined symbol for xcb_xfixes_create_region in loader_dri3_helper.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "25627ffe656339da9477d47039d3ad561dd70fd3", + "description": "mesa: Pass the correct caller string to _mesa_lookup_or_create_texture", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b847d4150fff3e8871617b2c0f971127bf71280", + "description": "mesa/st: Silence unused parameter warnings in st_context.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4cd930565e3887f52e2bedc3040daa14df0ff8d3", + "description": "i965: Silence unused parameter warnings", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3c87ac1f60875b5bbd4facca22fc426ee747997a", + "description": "isl: Fix the aux-map encoding for D24_UNORM_X8", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9a062383e35f812410fe025930b016b4b67710e5", + "description": "anv: Implement VariableDescriptorCount", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "215218f32f50136534cc37eec9bada2b4b6d1f95", + "description": "anv: Add a descriptor_count to descriptor sets", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dd1971c9d8f7ea80a52538c04a82f6166d96b952", + "description": "anv: Bump the number of update-after-bind descriptors to 1M", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3c0ba7b1802bf231ef3f0a7bdbeb474be99a2aed", + "description": "zink: reject resource-imports with modifiers", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "d686835171c44cf325306f6c87408bef999f5496" + }, + { + "sha": "068b4bf76c03a983982be8af13daf228be4531ce", + "description": "doc/features: remove zink entries for GL 3.3 items", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "af8d488ea5e13219c67c132a3edfa2e7a698746d", + "description": "util,ac,aco,radv: Cross-platform memstream API", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4c2e7b08930654cff115b1a4a22abc851dae958b", + "description": "util,radv,radv/winsys: Cross-platform rwlock API", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9d4999e7cf7ca530b3e99a1923f1df9e2069cd34", + "description": "anv: Ignore continue flag in primary cmd buffers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b3529e56b5d98b7cae22d57f7c474092c94f7f65", + "description": "ac/nir: implement missing nir_op_pack_half_2x16_split", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "2f125908b358c2423aa34d2673ec04aa546b971a" + }, + { + "sha": "57d317865e7bee02a17efcde8beeb6a220f900f1", + "description": "radeonsi: enable NGG culling by default on Navi1x PRO cards", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "67a67ea29e7177393c3ba9f70c5b7207862065ab", + "description": "radeonsi: enable NGG on Navi14 PRO cards", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bc3c74e2a296b48c4473a37c863e9df3005355fb", + "description": "radeonsi: tweak LATE_ALLOC_GS numbers for faster NGG culling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b38b5aabee4a106ca3a071ed6568a2a0bd4bd977", + "description": "radeonsi: pack LDS better for NGG culling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8094367988932b071ec482fe717eebddeaaf58ca", + "description": "radeonsi: write VS/TES system values into LDS after culling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1de0bf0a56d630fa1d070eaff55c40053032cd6b", + "description": "radeonsi: remove indirection when loading position at the end for NGG culling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "823ee12d57edf2a414f25f3bfc91caf5e576589c", + "description": "radeonsi: optimize out LDS bank conflicts in the NGG culling shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7648060dc03775979e3fa8904c4948c084e82b6a", + "description": "radeonsi: enable NGG culling by default on gfx10.3 dGPUs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e7d3f22c6ae532e25f26c0f2740fa9ad88794d71", + "description": "radeonsi: don't disable NGG culling on gfx10.3", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6810e6e4d0e321fc45f55e984912b824fe2a1b28", + "description": "Revert \"radeonsi/gfx10: disable vertex grouping\"", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "a23802bcb9a42a02d34a5a36d6e66d6532813a0d" + }, + { + "sha": "7d1fb5fffe346dacda896f9feaa7df4695a1d543", + "description": "v3dv: Initialize time before usage by free_stale_bos.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "f78c99f357eee08926dcf9434c3944707837fe34" + }, + { + "sha": "8eb246d7f6137c6d1783c187d37e415e61211d2d", + "description": "v3dv: Remove unsigned comparison to zero.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5e27e0432274a29e75f9267269a2b367eda33cbf", + "description": "intel/isl: Drop redundant unpack of unorm channels", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "09ced6542049986f7fe52af8087aec9fc23d9f16" + }, + { + "sha": "cf11ebfbc22b5ebabdcedac0e045deb0f63ecff7", + "description": "st/mesa: Add missing sentinels in format_map[]", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0c70268ff73adfc9d588fa5a10c500ed9f66bab6", + "description": "zink: mark ARB_sample_shading as supported", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf9a1e0a4be96b5f74d792596b10394ca9092703", + "description": "zink: add a pipe_context::get_sample_position hook", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b009bd968599ea2a6c319c2750b0b12b15582e8d", + "description": "zink: add ntv handling for ARB_sample_shading", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "441b32bf3f4ea79b90f5c4de4ffab180dc53d2f8", + "description": "zink: add helper for vec-type input variables in ntv", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7c5129985bcac75053823a31674e8a1e2629230c", + "description": "zink: implement ARB_draw_indirect", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3e3b6e995fa44ab778451c1fc24c2caabca4299f", + "description": "zink: set 3.3 complete in features.txt", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "80c66642cc88dcd06432ea5d49bb768c3cd9f06d", + "description": "zink: bump GLSL to 3.30", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "44baddf1fe995d25762e048ddf5cf10acdcd718a", + "description": "zink: set 3.2 complete in features.txt", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "26b076ff66f0c95a63a7afb048d4adbe39e76c50", + "description": "zink: GLSL 1.50", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b27ec49e0220b1a71f24fd0e451dbee305ab0877", + "description": "zink: mark off GL 3.1 as done in features.txt", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "815f0c01b0f373848784a8f4c7ef5a9026050413", + "description": "zink: bump to glsl 1.40", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8028991f361f22f20abdbed0c2f61f7c5c23c052", + "description": "zink: enable gs pipe caps", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4c734da7defb6869af4f2f509782b95eb0180024", + "description": "zink: add gallium handling for geometry shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d59e4b8ce6497097954028df2469fc7b404a106b", + "description": "zink: round out handling for streamout buffer stride setting during draw", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9915c540b269943907874a666911a9efb2ea4303", + "description": "zink: remove ADJACENCY prim types from primconvert path", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d656c600a399e09a80dbec4c03e5d8e122904521", + "description": "zink: add handling for gs in ntv", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9671cd9215150d8eeecfe0868dbedba3493f8b5a", + "description": "zink: re-transform gl_Position for gs input", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ea2279daf1f561acedaae90959d23888cf754d5d", + "description": "zink: add ntv handling for geometry shader variables", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "423a342ae7486173fc7ef26a3776de1aafcf91bd", + "description": "zink: handle shader io vars more generically for use with gs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "182f7f9ae8e492355d6e98bc88d52a761b2b2996", + "description": "zink: add some spirv_builder functions we'll be using for geometry shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5934fc170841f9dd46fdc71b794e7c468e4945ff", + "description": "r600/sfn: Update state docu", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9f5e5fb912203c0ae6de242144a91da8a7e41c8b", + "description": "r600/sfn: Fix the parameter component type", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5f661977f8e66bb558eb43e9b72719749d86db03", + "description": "r600/sfn: Use register keep-alive also when scanning the shader", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9eda7176e314712cae2468313b5c83b5f2a72f1d", + "description": "r600/sfn: fix remapping of deleted attributes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f79b7fcf7c7f5db626efdb63f27e8bc64d0aed77", + "description": "r600/sfn: use 32 bit bools", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "92a4d45bee61c29256b592e236b04852d101a90a", + "description": "r600/sfn: use cacheless op for coherent image write", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "02e39b737a7c74486a7e66b9bb46eca63dbd1a75", + "description": "r600/sfn: Handle mem barrier and image barrier by using ACK", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f664fc1c7b54e90a0bff5dd25be0381b6224cbbc", + "description": "r600/sfn: use shared pointer to GPR for FS sysvalues", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "876b975becb25236c986c4055b0dd4cfd79a0230", + "description": "r600/sfn: Rework get_temp_register to return a smart pointer to GPRValue", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "92bc0661f5ced456116812b4416f1eea916a14b4", + "description": "r600/sfn: fix mega fetch count for SSBO/Image atomics result fetch", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d79d20d398a809d05f6d5ea45ce54ca0b9065a4f", + "description": "r600/sfn: Fix keepalive patch", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "86254134b38a7337c6685e9c499ad08eb8ff2c89", + "description": "r600/sfn: Fix ssbo resource offset for buffer loads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ed66eafb6d811d2f53f25e69bee006489a63fead", + "description": "r600/sfn: Don't reuse registers for workgroup ID and local invocation ID", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bafb2bb5516ce17f9636a7d4069aed0053e41981", + "description": "glsl: Initialize add_uniform_to_shader member var in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aea74eac3d7706ed8d870504b163356e3f104a4c", + "description": "zink: fix stencil wrapping", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b17e264e66003a60d3dd779f6cece9c863c92826", + "description": "glsl: Initialize lower_ubo_reference_visitor members in constructor.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e32eadc586a547054404dd6d943bfef48ca52df2", + "description": "v3dv: Fix assert using assign instead of compare.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "c75846e674e87ef545a9303e572388b29313ac3d" + }, + { + "sha": "e7e24d5039cca6c1254935ea8067e7c925b04c51", + "description": "intel/fs: Handle nir_intrinsic_terminate", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "886d2d1a9abcb0572a957c24ae44de4d6c055bc0", + "description": "spirv: Handle SpvOpTerminateInvocation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4dfd2923079ca8cbc1216538a9584ce42f484680", + "description": "spirv: Update headers and metadata from latest Khronos commit", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f6d5dd825f3f13cee2b3b170b4242290dc2989a4", + "description": "nir: Add nir_intrinsic_terminate and nir_intrinsic_terminate_if", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fdb65b8b2373df7512290a0e4115dac3eeec8f10", + "description": "aco: add missing SCC clobber in get_buffer_size", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "fcd6d8324560b5897586cbf8161f9b46bff5d11f" + }, + { + "sha": "6be2bbec6da17fcc1f0ef4a474eaefd656b6214b", + "description": "Revert \"iris: Use the data cache for indirect UBO pulls\"", + "nominated": false, + "nomination_type": 2, + "resolution": 4, + "master_sha": null, + "because_sha": "3b784370c376dbe642c2a8232169363d4fda0c02" + }, + { + "sha": "a5529eb235bcff2c677fb6a32ae633d3b4d6700d", + "description": "anv: Go back to using the sampler for UBO pulls", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "b54d37a8676acbd725ef1817479f2630d3ea95be" + }, + { + "sha": "d91cb31a2aad30c80c88dedc7b75853470266979", + "description": "vc4: Enable nir_lower_io for uniforms", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "96d99f2eccb4bea4b9ede38f9b2e472cdc617e99" + }, + { + "sha": "3e877ca27dfb2f0e1acff6e86fa886e737aebdd4", + "description": "vc4: Add missing range_base/range at nir_load_ubos in yuv_blit fs.", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "f3b33a5a35e605101d45213bddf52f2f800a52bb" + }, + { + "sha": "4cfdd425b65411a3a558748b4c2041783e95aa18", + "description": "vc4: Add missing load_ubo set_align in yuv_blit fs.", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "e78a7a182524f091e2d77ba97bfbe057c3975cab" + }, + { + "sha": "f91b2fe384a38d74df014c6421156a9f8b0bb78e", + "description": "nir/opt_load_store_vectorize: add some tests for discard/demote behaviour", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f8e971f51142195533e201ea45e33afaaaca1eac", + "description": "nir/opt_load_store_vectorize: don't vectorize stores across demote", + "nominated": true, + "nomination_type": 1, + "resolution": 1, + "master_sha": null, + "because_sha": "ce9205c03bd20d26af23ca891e97a9f848a612d1" + }, + { + "sha": "19aaafd4a667fb843a1c7727de23ff87179d9eb5", + "description": "intel: Remove Gen10-specific device entries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b0e3af4fb821c75b7a07506fed93a2c8c0134f67", + "description": "intel: Remove Gen10-speicific perf support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fc04733f09303089523a29f07779b3c9870e82a1", + "description": "anv: Don't generate Gen10-specific functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5490f5cbceb46d6424ee0b8a89c84b7f3e6ab55a", + "description": "iris: Don't generate Gen10-specific functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "495dc514ac88314e3909c333d6f3564d671c8c6d", + "description": "intel/isl: Don't generate Gen10-specific functions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1f00b0f4efac5438bb2c4896288e3b48b679fb1e", + "description": "intel: Remove Gen10-specific cache config code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9c19a3874d862579292291ffec5954a610702efb", + "description": "i965: Don't build Gen10-specific files and libraries", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3220ff70f3889d9a1523706e4e83de86e20b19e9", + "description": "i965: Remove Gen10-specific state setup and workarounds", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "262ca98b3a45075800d88abb9a069d8c21578330", + "description": "intel/compiler: Remove Gen10-specific code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e22f106e017ca23ad3cf21e9f705fdb8dbee65ed", + "description": "intel: Disable all support for Gen10", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "112f23fd332039879c19ebe0ce9e0b96e0dd0191", + "description": "i965: Rename gen10_emit_isp_disable to gen7_emit_isp_disable", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "fe261091238135490b87a43fa050b1396c15281e", + "description": "i965: Make MOCS index tables static const", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "02a0819dccaf55cebafb85aaafbd06619520bfa3", + "description": "i965: Silence many unused parameter warnings in genX_state_upload.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5a5f30bf434f9ab160403930ff57cc116bc39f4f", + "description": "i965: Silence many unused parameter warnings in genX_blorp_exec.c", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b9c326bc4dd8a78fa30a919a405be1d0b05a0bb6", + "description": "intel: Silence many unused parameter warnings in blorp_genX_exec.h", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "de77fabe132d63fa3ae951e7d48e89b74ef75e89", + "description": "i965: Allow viewport array extensions with allow_higher_compat_version", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9223842c6b3b4082059f3200d0b74c708bde6cfa", + "description": "ci: Unskip fragment_ops tests on Bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "cdfb1d925f388861751ce04a0000141013e1c704", + "description": "zink: add last few format maps for ARB_vertex_type_2_10_10_10_rev", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8e96b4f430f0af2a0712b721ad9308b267888fae", + "description": "zink: add VK_BUFFER_USAGE_INDEX_BUFFER_BIT to vertex buffer creation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c364124440c7e08e4e092beb9a1d064838167994", + "description": "zink: handle null attachment for ARB_texture_buffer_object samplers", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "69dc0f2f7490920fc16dfa8b6a33b6c2acc8b98b", + "description": "zink: assert valid format in zink_create_sampler_view()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "00db994d9687b17f70c1c0602f723887b48bf6cd", + "description": "zink: ensure resource tracking for sampler buffers in render batches", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "63b299d013465ebf5e9331a0fd35ada5e166ac0d", + "description": "zink: implement ARB_texture_buffer_object", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4c2b02f5994efb0e25aafc8ff816b03dd6439651", + "description": "zink: increase descriptor pool sizes for other descriptor types we'll be using", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4231cc2e99a1a8628a62b27963127229e056d227", + "description": "glsl: more accurately handle swizzle in 64bit varying split with no left value", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d75d12f5073fdc92a30f9c2335648f7ea97c7d03", + "description": "aco: don't use v_pack_b32_f16 if 16-bit input denormals are flushed", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d4b3e869ee164d7beabef43193d4453290f243e0", + "description": "aco: propagate literals into sub-dword pseudo instructions on GFX9+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1a652244e4bdc0cefa907a91c81ab1efe1eafbd3", + "description": "aco: implement 16-bit literals", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "91d9c55f3a435717224dace90b6181833ca9ea8e", + "description": "panfrost: Add blend shader support to bifrost", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f04e5ef7ff1c1d0dbd53ef3b2d90657beca769fd", + "description": "panfrost: Add missing tile-buffer formats to the format enum", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "685d45ff938f004aebffb06e8766ee9027030167", + "description": "pan/bi: Special-case load_input for blend shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d40460757b809c8c28ee78f79e87a01e79bc518", + "description": "pan/bi: Reserve r0-r3 in blend shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3432d0a3e53896348d840176722be46d080ad439", + "description": "pan/bi: Special-case BLEND instruction emission for blend shaders", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f3f5da91dee2aa9727e7e2244a262584b0d9c02", + "description": "pan/bi: Collect return addresses of blend calls", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8da0a1d5fd4ed256e9cae0d0e61493be3efade85", + "description": "pan/bi: Add load_output support", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c7748968ba0fdad324f561e0219fdb6812d7df2c", + "description": "panfrost: Flag blend shader function as an entry point", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6d3fce56801936ad66b540912f6e1593177b62b8", + "description": "panfrost: Scalarize nir_load_blend_const_color_rgba", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8d707cd91833f50a1111bf4481378ee99069befe", + "description": "panfrost: Add a \"Bifrost Internal Blend\" descriptor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e6186c204266cf4f9b6c94604009cc7bfe3b6193", + "description": "pan/bi: Support indirect jumps", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1a1d9cce465346d64da70e4439e40846ad02d70c", + "description": "pan/bi: Add support for load_blend_const_color_{r,g,b,a}_float", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "16179c89d118144b4435d9ad30e67d5fb7d923ef", + "description": "pan/bi: Rework blend descriptor access handling", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6dd2a76126f6dfa43930d9d95b7d61662a4a7ec5", + "description": "pan/bi: Get rid of the regs argument in bi_assign_fau_idx()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f25850bf5f9e83b2ae00ef283caad3c35d2fb8cb", + "description": "pan/bi: Use canonical name for FAU RAM sources", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "111cf7f0e82ec28aad9a3211cd252e9cc04f6dc1", + "description": "pan/bi: Copy blend shader info from compile_inputs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "6c61f0b8e5d1c37192fef9de83201653cc998de5", + "description": "panfrost: Extend compile_inputs to pass a blend descriptor", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d8326ceafbd8903b69e211ec6449ecdb83efc761", + "description": "panfrost: Fix fixed-function blend on bifrost", + "nominated": false, + "nomination_type": 1, + "resolution": 4, + "master_sha": null, + "because_sha": "8389976b7c09d6bb7ffb9964ec8748ecc3a9862a" + }, + { + "sha": "442f48f27b666cd2183d4ce27977da045ee34b0f", + "description": "v3d/compiler: implement load interpolated input intrinsics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3ec165bce99dfc58f5d023d9dc853c71f36a3f74", + "description": "broadcom/compiler: track partially interpolated fragment inputs", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "71ed8c5aa69c3edbd6ecb1e658824e6954b36d58", + "description": "iris: Fix doubling of shared local memory (SLM) sizes.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "341f5bffb78d05e4c3d16621f9d5de05f5149be3", + "description": "intel/compiler, anv: Delete cs_prog_data->slm_size", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e881290979d3ef0d26cb772e7aaf4b59a512001c", + "description": "broadcom/compiler: use nir io semantics", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9b01598fe5ae5675c1fe7082e39030121acd5221", + "description": "nir/lower_io_to_scalar: update io semantics on per-component inst", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ac5f0ee19cefc639289e8d1ceda483ecc4b59c9e", + "description": "broadcom/compiler: support varyings with struct types", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ebd5b555c153918eaf7cd3a304f45722da1c6e69", + "description": "docs/release-calendar: plan 20.3 release", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f8117f70519e0bb864fe1f324b7376a07fc0fe23", + "description": "intel/fs: Allow constant-propagation into SAMPLEINFO and IMAGE_SIZE", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "438a409290afe10271a5b150c889c2563529b53e", + "description": "docs: update calendar and link releases notes for 20.1.10", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "713b666f2907d24227e4ee9a91eb887d851111ce", + "description": "docs: add release notes for 20.1.10", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0d9216a7cbd11895e3b8c132f658225ccdeef478", + "description": "isl: Allow CCS for 8bpp surfaces with 3+ miplevels", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f94ba6b6f56afdc443e22b1cafcd9d3f6f3f007e", + "description": "iris: Add fast-clear restriction for 8bpp surfaces", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1affcea37a6529d99626cd7daa8d7e8d9144dfb4", + "description": "docs: update calendar and link releases notes for 20.2.1", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dee2fdb3da967ac56f76d09c0153b35eddc648c3", + "description": "docs: add SHA256 sums for 20.2.1", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3c89e7b42293fe4d9f2b0c2cfea04c3d7eef8eaa", + "description": "docs: add release notes for 20.2.1", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bb00a6860eeb5c92db3dc4b98df1f2e568fa162d", + "description": "radv: fix optimizing needed states if some are marked as dynamic", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "68daac28df1b2f50a43740d1905932cfde0ddf1a", + "description": "docs: Document how to replicate a CI build locally.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "0767af3ffece3b951b567a01a2c0da7218a4c70d", + "description": "ci/android: Switch to using the Android NDK.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "ad6189920be9efcef639185b072d2be4e3a37049", + "description": "symbols-check: Add __cxa_guard_* to the list of approved symbols.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4722491124ea84b62761abe0bb6745da03ab7f6e", + "description": "glsl/tests: Make the tests skip on Android binary execution failures.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f51ce21e4e0bf7efabe58afb4a2cd6b9f98d9505", + "description": "meson: Drop adding -Wl,--gc-sections to project c/cpp arguments.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d5a72319d6b8fa316608b5b93ac421f54716b674", + "description": "aco/isel: Remove now unused VS-related code from create_null_export", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c22c702f352bb6ce8031f63479eb1b590dba6f4a", + "description": "aco/isel: Remove some dead code", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "bf51b11c04fcf9c5f2f86c74ca53ec2ae3fcf4d6", + "description": "aco/isel: Always export position data from VS/NGG", + "nominated": true, + "nomination_type": 0, + "resolution": 1, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f29c81f863c9879a6a87724cbdae1e1818f3f6b4", + "description": "aco: use VOP2 for v_cvt_pkrtz_f16_f32 if possible", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "7240edec2a6554d2cf8f49324d7415bbdb68a97c", + "description": "aco: use VOP2 version of v_cvt_pkrtz_f16_f32 on GFX_6_7_10", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2f125908b358c2423aa34d2673ec04aa546b971a", + "description": "radv,aco: lower_pack_half_2x16", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "dae1e6f7568dcf6eb536098931478f6b5b4af4b2", + "description": "aco: use v_cvt_pkrtz_f16_f32 for pack_half_2x16", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "9185b7c0698e8dc8c1a771493605a195531ac704", + "description": "aco: add validation rules for p_split_vector", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "aec872cda0b0a6cd73a591e28d1beef6cc6f7865", + "description": "aco: use p_split_vector for nir_op_unpack_half_*", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f503699e1068e1670355e3dc14f3d28419c5cbc3", + "description": "nir/opt_algebraic: optimize unpack_half_2x16_split_x(ushr, a, 16)", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "a38a497b86cffa9a67f95cb4a39c77642a2ef0a9", + "description": "aco: use p_create_vector for nir_op_pack_half_2x16", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3c2abd7116b8b5cbd60370ed91943970c158494d", + "description": "aco: expand create_vector more carefully w.r.t. subdword operands", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d887eb141bd46f499c83bc624593e4cd2007fd6e", + "description": "aco: propagate SGPRs into VOP1 instructions early.", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "3424e17b9a9beca85c0ef60e195eb544faea8995", + "description": "zink: unify code for emitting named uint-based variable instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "20d73a9049f0d3354d6bca4af32613b0466ba0f9", + "description": "aco: adjust an assertion about the wavesize in emit_gfx10_wave64_bpermute()", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "112e66fa090929401b2193e32a905221c3233a49", + "description": "aco: compute the CS workgroup size from the shader NIR info", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e3e8d13ada6c855e2f87be82c33d20ef9d43a875", + "description": "radv: move compiler statistics to ACO", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "97afb2a0a93514df1ba088b222e3e3bdedf8baf7", + "description": "aco: remove unused radv_shader.h includes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "408195ec53055d6ef0f4f735da60af9a8451b9c4", + "description": "aco: remove useless occurences of radv_nir_compiler_options", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8a6f60fc6b850b6553283c4fb8a7348635bb6cb6", + "description": "aco: remove stub lower_wqm() prototype", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "23e731fcdba429f48324a7c904e5f655116e4858", + "description": "zink: export PIPE_CAP_MAX*_VARYINGS values", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d50e8554b997338d709e526a8e7c6cd7e970f68c", + "description": "zink: add feature-documentation", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f85488ab827412114f2cb4ff9ee54aafd751454d", + "description": "zink: redo slot mapping again for the last time really I mean it", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4f144dc92ce221e0c7106436a1c2a43da2e9f733", + "description": "zink: don't leak sampler view textures", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "270969b55ef19edd908ae77a86297907e6035dd2", + "description": "zink: explicitly flag fb attachments as being written to in render passes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "8dfb941a4c5f7178f818758175c64bc9093b094f", + "description": "zink: add more explicit fencing for transfer maps", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e3ed624072e28d06b5afa198161afbbe3aadcaf2", + "description": "zink: optimize transfer_map for resources with pending reads/writes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "c6687eef2dcac3399adb26c4deae9b62d04d88cf", + "description": "zink: add a mechanism to track current resource usage in batches", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "48b988e35fe1c45e77138d92d162637b1ffc4486", + "description": "radv: fix ignoring the vertex attribute stride if set as dynamic", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "28d02b9d3e7b23146ac8bb28f11c797184638b5c", + "description": "ac,amd/llvm,radv: Initialize structs with {0}", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "b84d1a0c42c5a1973ebc53a49fd941590e540a82", + "description": "radv/aco: disable NGG GS support because it randomly hangs the GPU", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "21422b1ff204bbeac9951f0f146151e5dbf09b06", + "description": "nir/opt_uniform_atomics: remove useless returns", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "775866478807faef626130382c86f9490e13140e", + "description": "radv: Only close local_fd when valid", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "4ca6faa933abb01516ada903335cd66024859eeb", + "description": "util: Hide timespec_passed on Windows", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1026e2ac0f705cffc94a9f1183bcb23272227719", + "description": "radv: Increased const usage", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "1b551857f98b81650b6acc47c5e8f7a4cb4d3192", + "description": "amd/addrlib: Fix warning list for msvc", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "5abac85177d34bd05f89104c14fb4c7503134c8f", + "description": "intel/fs: Rework scratch handling on Gen9+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "e557af978111178361b9c990fd25302ffca3edf2", + "description": "intel/fs/ra: Use a set to track added spill/fill instructions", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f650c4c0c6b5d26cabde2b5672c9a6c769cd35e1", + "description": "intel/fs/ra: Sanity-check our IP counts", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "d80d0a6ced99c009986965e80f3b4174d19fa673", + "description": "intel/fs/ra: Store the last non-spill VGRF node", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "2af6528c334264492ad8b6b62c2b10836c7ec568", + "description": "intel/fs/ra: Refactor handling of Gen7 scratch reads", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "74a1843ca0fd85d60320c441944f3b005c50debf", + "description": "intel/fs/ra: Increment spill_offset as part of the emit_spill loop", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "06ebf23283e11c9abb507a523bd0633a82f7960b", + "description": "intel/fs: Add a SCRATCH_HEADER opcode", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "24b64c8408b3758c198aab90618d69dec093b4ad", + "description": "intel/fs: Copy the PTSS from g0 for scratch reads/writes", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { + "sha": "f103012fad7af5c74505075cdc71c4280216b229", + "description": "intel/batch_decoder: Don't clame vec4 vs/gs/tcs shaders on Gen11+", + "nominated": false, + "nomination_type": null, + "resolution": 4, + "master_sha": null, + "because_sha": null + }, + { "sha": "e1efc534e6c452e3e606d663864896a654acc185", "description": "v3dv/device: Support loader interface version 3.", "nominated": false, @@ -5935,7 +27886,7 @@ "description": "radeonsi: disable SDMA on gfx6-7 and gfx10.3 to decrease CPU overhead", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "master_sha": null, "because_sha": null }, @@ -23008,7 +44959,7 @@ "description": "freedreno/cffdec: When .mergedregs is set, don't count half regs.", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "master_sha": null, "because_sha": null }, diff -Nru mesa-20.2.1/src/amd/addrlib/src/core/addrobject.cpp mesa-20.2.6/src/amd/addrlib/src/core/addrobject.cpp --- mesa-20.2.1/src/amd/addrlib/src/core/addrobject.cpp 2020-10-14 17:19:10.279849300 +0000 +++ mesa-20.2.6/src/amd/addrlib/src/core/addrobject.cpp 2020-12-16 21:42:03.529110000 +0000 @@ -230,6 +230,7 @@ m_client.callbacks.debugPrint(&debugPrintInput); va_end(ap); + va_end(debugPrintInput.ap); } #endif } diff -Nru mesa-20.2.1/src/amd/common/ac_gpu_info.c mesa-20.2.6/src/amd/common/ac_gpu_info.c --- mesa-20.2.1/src/amd/common/ac_gpu_info.c 2020-10-14 17:19:10.283182600 +0000 +++ mesa-20.2.6/src/amd/common/ac_gpu_info.c 2020-12-16 21:42:03.533110000 +0000 @@ -441,9 +441,9 @@ info->chip_external_rev = amdinfo->chip_external_rev; info->marketing_name = amdgpu_get_marketing_name(dev); info->is_pro_graphics = info->marketing_name && - (!strcmp(info->marketing_name, "Pro") || - !strcmp(info->marketing_name, "PRO") || - !strcmp(info->marketing_name, "Frontier")); + (strstr(info->marketing_name, "Pro") || + strstr(info->marketing_name, "PRO") || + strstr(info->marketing_name, "Frontier")); /* Set which chips have dedicated VRAM. */ info->has_dedicated_vram = @@ -521,6 +521,14 @@ if (info->family == CHIP_KAVERI) info->num_render_backends = 2; + /* Guess the number of enabled SEs because the kernel doesn't tell us. */ + if (info->chip_class >= GFX10_3 && info->max_se > 1) { + unsigned num_rbs_per_se = info->num_render_backends / info->max_se; + info->num_se = util_bitcount(amdinfo->enabled_rb_pipes_mask) / num_rbs_per_se; + } else { + info->num_se = info->max_se; + } + info->clock_crystal_freq = amdinfo->gpu_counter_freq; if (!info->clock_crystal_freq) { fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n"); @@ -666,9 +674,9 @@ */ unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1; info->max_good_cu_per_sa = DIV_ROUND_UP(info->num_good_compute_units, - (info->max_se * info->max_sh_per_se * cu_group)) * cu_group; + (info->num_se * info->max_sh_per_se * cu_group)) * cu_group; info->min_good_cu_per_sa = (info->num_good_compute_units / - (info->max_se * info->max_sh_per_se * cu_group)) * cu_group; + (info->num_se * info->max_sh_per_se * cu_group)) * cu_group; memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, sizeof(amdinfo->gb_tile_mode)); @@ -962,6 +970,7 @@ printf(" max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa); printf(" min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa); printf(" max_se = %i\n", info->max_se); + printf(" num_se = %i\n", info->num_se); printf(" max_sh_per_se = %i\n", info->max_sh_per_se); printf(" max_wave64_per_simd = %i\n", info->max_wave64_per_simd); printf(" num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd); @@ -1289,7 +1298,7 @@ if (info->chip_class >= GFX7) { unsigned num_cu_per_se = info->num_good_compute_units / - info->max_se; + info->num_se; /* Force even distribution on all SIMDs in CU if the workgroup * size is 64. This has shown some good improvements if # of CUs diff -Nru mesa-20.2.1/src/amd/common/ac_gpu_info.h mesa-20.2.6/src/amd/common/ac_gpu_info.h --- mesa-20.2.1/src/amd/common/ac_gpu_info.h 2020-10-14 17:19:10.283182600 +0000 +++ mesa-20.2.6/src/amd/common/ac_gpu_info.h 2020-12-16 21:42:03.533110000 +0000 @@ -163,7 +163,8 @@ uint32_t num_good_compute_units; uint32_t max_good_cu_per_sa; uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */ - uint32_t max_se; /* shader engines */ + uint32_t max_se; /* number of shader engines incl. disabled ones */ + uint32_t num_se; /* number of enabled shader engines */ uint32_t max_sh_per_se; /* shader arrays per shader engine */ uint32_t max_wave64_per_simd; uint32_t num_physical_sgprs_per_simd; diff -Nru mesa-20.2.1/src/amd/compiler/aco_instruction_selection.cpp mesa-20.2.6/src/amd/compiler/aco_instruction_selection.cpp --- mesa-20.2.1/src/amd/compiler/aco_instruction_selection.cpp 2020-10-14 17:19:10.286516000 +0000 +++ mesa-20.2.6/src/amd/compiler/aco_instruction_selection.cpp 2020-12-16 21:42:03.537109900 +0000 @@ -2001,7 +2001,7 @@ bld.vop2(aco_opcode::v_xor_b32, Definition(dst), Operand(0x80000000u), as_vgpr(ctx, src)); } else if (dst.regClass() == v2) { if (ctx->block->fp_mode.must_flush_denorms16_64) - src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand(0x3FF0000000000000lu), as_vgpr(ctx, src)); + src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand(UINT64_C(0x3FF0000000000000)), as_vgpr(ctx, src)); Temp upper = bld.tmp(v1), lower = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), upper); @@ -2025,7 +2025,7 @@ bld.vop2(aco_opcode::v_and_b32, Definition(dst), Operand(0x7FFFFFFFu), as_vgpr(ctx, src)); } else if (dst.regClass() == v2) { if (ctx->block->fp_mode.must_flush_denorms16_64) - src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand(0x3FF0000000000000lu), as_vgpr(ctx, src)); + src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand(UINT64_C(0x3FF0000000000000)), as_vgpr(ctx, src)); Temp upper = bld.tmp(v1), lower = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); upper = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x7FFFFFFFu), upper); @@ -6194,7 +6194,7 @@ Temp size = emit_extract_vector(ctx, desc, 2, s1); Temp size_div3 = bld.vop3(aco_opcode::v_mul_hi_u32, bld.def(v1), bld.copy(bld.def(v1), Operand(0xaaaaaaabu)), size); - size_div3 = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.as_uniform(size_div3), Operand(1u)); + size_div3 = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), bld.as_uniform(size_div3), Operand(1u)); Temp stride = emit_extract_vector(ctx, desc, 1, s1); stride = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), stride, Operand((5u << 16) | 16u)); @@ -8514,9 +8514,7 @@ has_bias = true; break; case nir_tex_src_lod: { - nir_const_value *val = nir_src_as_const_value(instr->src[i].src); - - if (val && val->f32 <= 0.0) { + if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0) { level_zero = true; } else { lod = get_ssa_temp(ctx, instr->src[i].src.ssa); @@ -9433,7 +9431,7 @@ continue; } - if (block.kind & block_kind_continue) { + if ((block.kind & block_kind_continue) && block.index != last) { vals[idx - first] = header_phi->operands[next_pred]; next_pred++; continue; @@ -10083,6 +10081,11 @@ ctx->outputs.temps[VARYING_SLOT_LAYER * 4u] = as_vgpr(ctx, get_arg(ctx, ctx->args->ac.view_index)); } + /* Hardware requires position data to always be exported, even if the + * application did not write gl_Position. + */ + ctx->outputs.mask[VARYING_SLOT_POS] = 0xf; + /* the order these position exports are created is important */ int next_pos = 0; bool exported_pos = export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos); diff -Nru mesa-20.2.1/src/amd/compiler/aco_lower_to_hw_instr.cpp mesa-20.2.6/src/amd/compiler/aco_lower_to_hw_instr.cpp --- mesa-20.2.1/src/amd/compiler/aco_lower_to_hw_instr.cpp 2020-10-14 17:19:10.289849300 +0000 +++ mesa-20.2.6/src/amd/compiler/aco_lower_to_hw_instr.cpp 2020-12-16 21:42:03.539110000 +0000 @@ -1532,31 +1532,28 @@ /* if this is self-intersecting, we have to split it because * self-intersecting swaps don't make sense */ - PhysReg lower = swap.def.physReg(); - PhysReg higher = swap.op.physReg(); - if (lower.reg_b > higher.reg_b) - std::swap(lower, higher); - if (higher.reg_b - lower.reg_b < (int)swap.bytes) { - unsigned offset = higher.reg_b - lower.reg_b; + PhysReg src = swap.op.physReg(), dst = swap.def.physReg(); + if (abs((int)src.reg_b - (int)dst.reg_b) < (int)swap.bytes) { + unsigned offset = abs((int)src.reg_b - (int)dst.reg_b); RegType type = swap.def.regClass().type(); copy_operation middle; - lower.reg_b += offset; - higher.reg_b += offset; + src.reg_b += offset; + dst.reg_b += offset; middle.bytes = swap.bytes - offset * 2; memcpy(middle.uses, swap.uses + offset, middle.bytes); - middle.op = Operand(lower, RegClass::get(type, middle.bytes)); - middle.def = Definition(higher, RegClass::get(type, middle.bytes)); - copy_map[higher] = middle; + middle.op = Operand(src, RegClass::get(type, middle.bytes)); + middle.def = Definition(dst, RegClass::get(type, middle.bytes)); + copy_map[dst] = middle; copy_operation end; - lower.reg_b += middle.bytes; - higher.reg_b += middle.bytes; + src.reg_b += middle.bytes; + dst.reg_b += middle.bytes; end.bytes = swap.bytes - (offset + middle.bytes); memcpy(end.uses, swap.uses + offset + middle.bytes, end.bytes); - end.op = Operand(lower, RegClass::get(type, end.bytes)); - end.def = Definition(higher, RegClass::get(type, end.bytes)); - copy_map[higher] = end; + end.op = Operand(src, RegClass::get(type, end.bytes)); + end.def = Definition(dst, RegClass::get(type, end.bytes)); + copy_map[dst] = end; memset(swap.uses + offset, 0, swap.bytes - offset); swap.bytes = offset; diff -Nru mesa-20.2.1/src/amd/compiler/aco_optimizer.cpp mesa-20.2.6/src/amd/compiler/aco_optimizer.cpp --- mesa-20.2.1/src/amd/compiler/aco_optimizer.cpp 2020-10-14 17:19:10.289849300 +0000 +++ mesa-20.2.6/src/amd/compiler/aco_optimizer.cpp 2020-12-16 21:42:03.540109900 +0000 @@ -1232,7 +1232,7 @@ ctx.info[instr->operands[i].tempId()].set_omod2(instr->definitions[0].getTemp()); } else if (instr->operands[!i].constantValue() == (fp16 ? 0x4400 : 0x40800000)) { /* 4.0 */ ctx.info[instr->operands[i].tempId()].set_omod4(instr->definitions[0].getTemp()); - } else if (instr->operands[!i].constantValue() == (fp16 ? 0xb800 : 0x3f000000)) { /* 0.5 */ + } else if (instr->operands[!i].constantValue() == (fp16 ? 0x3800 : 0x3f000000)) { /* 0.5 */ ctx.info[instr->operands[i].tempId()].set_omod5(instr->definitions[0].getTemp()); } else if (instr->operands[!i].constantValue() == (fp16 ? 0x3c00 : 0x3f800000) && !(fp16 ? block.fp_mode.must_flush_denorms16_64 : block.fp_mode.must_flush_denorms32)) { /* 1.0 */ @@ -1728,6 +1728,31 @@ return true; } +bool is_operand_constant(opt_ctx &ctx, Operand op, unsigned bit_size, uint64_t *value) +{ + if (op.isConstant()) { + *value = op.constantValue64(); + return true; + } else if (op.isTemp()) { + unsigned id = original_temp_id(ctx, op.getTemp()); + if (!ctx.info[id].is_constant_or_literal(bit_size)) + return false; + *value = get_constant_op(ctx, ctx.info[id], bit_size).constantValue64(); + return true; + } + return false; +} + +bool is_constant_nan(uint64_t value, unsigned bit_size) +{ + if (bit_size == 16) + return ((value >> 10) & 0x1f) == 0x1f && (value & 0x3ff); + else if (bit_size == 32) + return ((value >> 23) & 0xff) == 0xff && (value & 0x7fffff); + else + return ((value >> 52) & 0x7ff) == 0x7ff && (value & 0xfffffffffffff); +} + /* s_or_b64(v_cmp_neq_f32(a, a), cmp(a, #b)) and b is not NaN -> get_unordered(cmp)(a, b) * s_and_b64(v_cmp_eq_f32(a, a), cmp(a, #b)) and b is not NaN -> get_ordered(cmp)(a, b) */ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr& instr) @@ -1751,7 +1776,8 @@ else if (get_f32_cmp(nan_test->opcode) != expected_nan_test) return false; - if (!is_cmp(cmp->opcode) || get_cmp_bitsize(cmp->opcode) != get_cmp_bitsize(nan_test->opcode)) + unsigned bit_size = get_cmp_bitsize(cmp->opcode); + if (!is_cmp(cmp->opcode) || get_cmp_bitsize(nan_test->opcode) != bit_size) return false; if (!nan_test->operands[0].isTemp() || !nan_test->operands[1].isTemp()) @@ -1780,22 +1806,10 @@ if (constant_operand == -1) return false; - uint32_t constant; - if (cmp->operands[constant_operand].isConstant()) { - constant = cmp->operands[constant_operand].constantValue(); - } else if (cmp->operands[constant_operand].isTemp()) { - Temp tmp = cmp->operands[constant_operand].getTemp(); - unsigned id = original_temp_id(ctx, tmp); - if (!ctx.info[id].is_constant_or_literal(32)) - return false; - constant = ctx.info[id].val; - } else { + uint64_t constant_value; + if (!is_operand_constant(ctx, cmp->operands[constant_operand], bit_size, &constant_value)) return false; - } - - float constantf; - memcpy(&constantf, &constant, 4); - if (isnan(constantf)) + if (is_constant_nan(constant_value, bit_size)) return false; if (cmp->operands[0].isTemp()) @@ -1886,7 +1900,8 @@ Instruction* op1_instr, bool swap, const char *shuffle_str, Operand operands[3], bool neg[3], bool abs[3], uint8_t *opsel, bool *op1_clamp, uint8_t *op1_omod, - bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel) + bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel, + bool *precise) { /* checks */ if (op1_instr->opcode != op1) @@ -1924,6 +1939,9 @@ else if (op1_vop3 && op1_vop3->opsel & (1 << swap)) return false; + *precise = op1_instr->definitions[0].isPrecise() || + op2_instr->definitions[0].isPrecise(); + int shuffle[3]; shuffle[shuffle_str[0] - '0'] = 0; shuffle[shuffle_str[1] - '0'] = 1; @@ -1979,12 +1997,12 @@ continue; Operand operands[3]; - bool neg[3], abs[3], clamp; + bool neg[3], abs[3], clamp, precise; uint8_t opsel = 0, omod = 0; if (match_op3_for_vop3(ctx, instr->opcode, op2, instr.get(), swap, shuffle, operands, neg, abs, &opsel, - &clamp, &omod, NULL, NULL, NULL)) { + &clamp, &omod, NULL, NULL, NULL, &precise)) { ctx.uses[instr->operands[swap].tempId()]--; create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod); if (omod_clamp & label_omod_success) @@ -2005,21 +2023,21 @@ uint64_t omod_clamp = ctx.info[instr->definitions[0].tempId()].label & (label_omod_success | label_clamp_success); - /* min(-max(a, b), c) -> min3(-a, -b, c) * - * max(-min(a, b), c) -> max3(-a, -b, c) */ + /* min(-max(a, b), c) -> min3(c, -a, -b) * + * max(-min(a, b), c) -> max3(c, -a, -b) */ for (unsigned swap = 0; swap < 2; swap++) { Operand operands[3]; - bool neg[3], abs[3], clamp; + bool neg[3], abs[3], clamp, precise; uint8_t opsel = 0, omod = 0; bool inbetween_neg; if (match_op3_for_vop3(ctx, instr->opcode, opposite, instr.get(), swap, "012", operands, neg, abs, &opsel, - &clamp, &omod, &inbetween_neg, NULL, NULL) && + &clamp, &omod, &inbetween_neg, NULL, NULL, &precise) && inbetween_neg) { ctx.uses[instr->operands[swap].tempId()]--; - neg[1] = true; - neg[2] = true; + neg[1] = !neg[1]; + neg[2] = !neg[2]; create_vop3_for_op3(ctx, minmax3, instr, operands, neg, abs, opsel, clamp, omod); if (omod_clamp & label_omod_success) ctx.info[instr->definitions[0].tempId()].set_omod_success(instr.get()); @@ -2261,11 +2279,17 @@ for (unsigned swap = 0; swap < 2; swap++) { Operand operands[3]; - bool neg[3], abs[3], clamp; + bool neg[3], abs[3], clamp, precise; uint8_t opsel = 0, omod = 0; if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap, "012", operands, neg, abs, &opsel, - &clamp, &omod, NULL, NULL, NULL)) { + &clamp, &omod, NULL, NULL, NULL, &precise)) { + /* max(min(src, upper), lower) returns upper if src is NaN, but + * med3(src, lower, upper) returns lower. + */ + if (precise && instr->opcode != min) + continue; + int const0_idx = -1, const1_idx = -1; uint32_t const0 = 0, const1 = 0; for (int i = 0; i < 3; i++) { @@ -2515,7 +2539,11 @@ /* apply omod / clamp modifiers if the def is used only once and the instruction can have modifiers */ if (!instr->definitions.empty() && ctx.uses[instr->definitions[0].tempId()] == 1 && can_use_VOP3(ctx, instr) && instr_info.can_use_output_modifiers[(int)instr->opcode]) { - bool can_use_omod = (instr->definitions[0].bytes() == 4 ? block.fp_mode.denorm32 : block.fp_mode.denorm16_64) == 0; + bool can_use_omod; + if (instr->definitions[0].bytes() == 4) + can_use_omod = block.fp_mode.denorm32 == 0 && !block.fp_mode.preserve_signed_zero_inf_nan32; + else + can_use_omod = block.fp_mode.denorm16_64 == 0 && !block.fp_mode.preserve_signed_zero_inf_nan16_64; ssa_info& def_info = ctx.info[instr->definitions[0].tempId()]; if (can_use_omod && def_info.is_omod2() && ctx.uses[def_info.temp.id()]) { to_VOP3(ctx, instr); @@ -2759,7 +2787,7 @@ else combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32, "012", 1 | 2); } else if (instr->opcode == aco_opcode::v_add_u32) { if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ; - else if (ctx.program->chip_class >= GFX9) { + else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) { if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ; else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ; else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ; @@ -2986,7 +3014,9 @@ /* Mark SCC needed, so the uniform boolean transformation won't swap the definitions when it isn't beneficial */ if (instr->format == Format::PSEUDO_BRANCH && instr->operands.size() && - instr->operands[0].isTemp()) { + instr->operands[0].isTemp() && + instr->operands[0].isFixed() && + instr->operands[0].physReg() == scc) { ctx.info[instr->operands[0].tempId()].set_scc_needed(); return; } else if ((instr->opcode == aco_opcode::s_cselect_b64 || diff -Nru mesa-20.2.1/src/amd/compiler/aco_register_allocation.cpp mesa-20.2.6/src/amd/compiler/aco_register_allocation.cpp --- mesa-20.2.1/src/amd/compiler/aco_register_allocation.cpp 2020-10-14 17:19:10.289849300 +0000 +++ mesa-20.2.6/src/amd/compiler/aco_register_allocation.cpp 2020-12-16 21:42:03.540109900 +0000 @@ -38,8 +38,10 @@ namespace aco { namespace { +struct ra_ctx; + unsigned get_subdword_operand_stride(chip_class chip, const aco_ptr& instr, unsigned idx, RegClass rc); -void add_subdword_operand(chip_class chip, aco_ptr& instr, unsigned idx, unsigned byte, RegClass rc); +void add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx, unsigned byte, RegClass rc); std::pair get_subdword_definition_info(Program *program, const aco_ptr& instr, RegClass rc); void add_subdword_definition(Program *program, aco_ptr& instr, unsigned idx, PhysReg reg, bool is_partial); @@ -352,8 +354,22 @@ return 4; } -void add_subdword_operand(chip_class chip, aco_ptr& instr, unsigned idx, unsigned byte, RegClass rc) +void update_phi_map(ra_ctx& ctx, Instruction *old, Instruction *instr) +{ + for (Operand& op : instr->operands) { + if (!op.isTemp()) + continue; + std::unordered_map::iterator phi = ctx.phi_map.find(op.tempId()); + if (phi != ctx.phi_map.end()) { + phi->second.uses.erase(old); + phi->second.uses.emplace(instr); + } + } +} + +void add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx, unsigned byte, RegClass rc) { + chip_class chip = ctx.program->chip_class; if (instr->format == Format::PSEUDO || byte == 0) return; @@ -376,7 +392,9 @@ } return; } else if (can_use_SDWA(chip, instr)) { - convert_to_SDWA(chip, instr); + aco_ptr tmp = convert_to_SDWA(chip, instr); + if (tmp) + update_phi_map(ctx, tmp.get(), instr.get()); return; } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) { VOP3A_instruction *vop3 = static_cast(instr.get()); @@ -1331,7 +1349,7 @@ PhysReg reg; reg.reg_b = j * 4; unsigned bytes_left = bytes - (j - reg_lo) * 4; - for (unsigned k = 0; k < MIN2(bytes_left, 4); k++, reg.reg_b++) + for (unsigned byte_idx = 0; byte_idx < MIN2(bytes_left, 4); byte_idx++, reg.reg_b++) k += reg_file.test(reg, 1); } else { k += 4; @@ -2183,14 +2201,10 @@ !register_file.test(reg, definition->bytes())) definition->setFixed(reg); } else if (instr->opcode == aco_opcode::p_extract_vector) { - PhysReg reg; - if (instr->operands[0].isKillBeforeDef() && - instr->operands[0].getTemp().type() == definition->getTemp().type()) { - reg = instr->operands[0].physReg(); - reg.reg_b += definition->bytes() * instr->operands[1].constantValue(); - assert(!register_file.test(reg, definition->bytes())); + PhysReg reg = instr->operands[0].physReg(); + reg.reg_b += definition->bytes() * instr->operands[1].constantValue(); + if (get_reg_specified(ctx, register_file, definition->regClass(), parallelcopy, instr, reg)) definition->setFixed(reg); - } } else if (instr->opcode == aco_opcode::p_create_vector) { PhysReg reg = get_reg_create_vector(ctx, register_file, definition->getTemp(), parallelcopy, instr); @@ -2233,7 +2247,7 @@ if (op.isTemp() && op.isFirstKill() && op.isLateKill()) register_file.clear(op); if (op.isTemp() && op.physReg().byte() != 0) - add_subdword_operand(program->chip_class, instr, i, op.physReg().byte(), op.regClass()); + add_subdword_operand(ctx, instr, i, op.physReg().byte(), op.regClass()); } /* emit parallelcopy */ @@ -2366,19 +2380,9 @@ aco_ptr tmp = std::move(instr); Format format = asVOP3(tmp->format); instr.reset(create_instruction(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size())); - for (unsigned i = 0; i < instr->operands.size(); i++) { - Operand& operand = tmp->operands[i]; - instr->operands[i] = operand; - /* keep phi_map up to date */ - if (operand.isTemp()) { - std::unordered_map::iterator phi = ctx.phi_map.find(operand.tempId()); - if (phi != ctx.phi_map.end()) { - phi->second.uses.erase(tmp.get()); - phi->second.uses.emplace(instr.get()); - } - } - } + std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin()); std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin()); + update_phi_map(ctx, tmp.get(), instr.get()); } instructions.emplace_back(std::move(*it)); diff -Nru mesa-20.2.1/src/amd/compiler/aco_scheduler.cpp mesa-20.2.6/src/amd/compiler/aco_scheduler.cpp --- mesa-20.2.1/src/amd/compiler/aco_scheduler.cpp 2020-10-14 17:19:10.289849300 +0000 +++ mesa-20.2.6/src/amd/compiler/aco_scheduler.cpp 2020-12-16 21:42:03.541110000 +0000 @@ -910,9 +910,9 @@ else ctx.num_waves = 7; ctx.num_waves = std::max(ctx.num_waves, program->min_waves); - ctx.num_waves = std::min(ctx.num_waves, program->max_waves); + ctx.num_waves = std::min(ctx.num_waves, program->num_waves); - assert(ctx.num_waves > 0 && ctx.num_waves <= program->num_waves); + assert(ctx.num_waves > 0); ctx.mv.max_registers = { int16_t(get_addr_vgpr_from_waves(program, ctx.num_waves) - 2), int16_t(get_addr_sgpr_from_waves(program, ctx.num_waves))}; diff -Nru mesa-20.2.1/src/amd/compiler/aco_spill.cpp mesa-20.2.6/src/amd/compiler/aco_spill.cpp --- mesa-20.2.1/src/amd/compiler/aco_spill.cpp 2020-10-14 17:19:10.293182800 +0000 +++ mesa-20.2.6/src/amd/compiler/aco_spill.cpp 2020-12-16 21:42:03.541110000 +0000 @@ -831,6 +831,11 @@ assert(phi->operands[i].isTemp() && phi->operands[i].isKill()); Temp var = phi->operands[i].getTemp(); + std::map::iterator rename_it = ctx.renames[pred_idx].find(var); + /* prevent the definining instruction from being DCE'd if it could be rematerialized */ + if (rename_it == ctx.renames[preds[i]].end() && ctx.remat.count(var)) + ctx.remat_used[ctx.remat[var].instr] = true; + /* build interferences between the phi def and all spilled variables at the predecessor blocks */ for (std::pair pair : ctx.spills_exit[pred_idx]) { if (var == pair.first) @@ -847,7 +852,6 @@ } /* rename if necessary */ - std::map::iterator rename_it = ctx.renames[pred_idx].find(var); if (rename_it != ctx.renames[pred_idx].end()) { var = rename_it->second; ctx.renames[pred_idx].erase(rename_it); @@ -938,6 +942,9 @@ std::map::iterator it = ctx.renames[pred_idx].find(phi->operands[i].getTemp()); if (it != ctx.renames[pred_idx].end()) phi->operands[i].setTemp(it->second); + /* prevent the definining instruction from being DCE'd if it could be rematerialized */ + else if (ctx.remat.count(phi->operands[i].getTemp())) + ctx.remat_used[ctx.remat[phi->operands[i].getTemp()].instr] = true; continue; } @@ -1027,12 +1034,16 @@ rename = {ctx.program->allocateId(), pair.first.regClass()}; for (unsigned i = 0; i < phi->operands.size(); i++) { Temp tmp; - if (ctx.renames[preds[i]].find(pair.first) != ctx.renames[preds[i]].end()) + if (ctx.renames[preds[i]].find(pair.first) != ctx.renames[preds[i]].end()) { tmp = ctx.renames[preds[i]][pair.first]; - else if (preds[i] >= block_idx) + } else if (preds[i] >= block_idx) { tmp = rename; - else + } else { tmp = pair.first; + /* prevent the definining instruction from being DCE'd if it could be rematerialized */ + if (ctx.remat.count(tmp)) + ctx.remat_used[ctx.remat[tmp].instr] = true; + } phi->operands[i] = Operand(tmp); } phi->definitions[0] = Definition(rename); @@ -1075,14 +1086,7 @@ /* phis are handled separetely */ while (block->instructions[idx]->opcode == aco_opcode::p_phi || block->instructions[idx]->opcode == aco_opcode::p_linear_phi) { - aco_ptr& instr = block->instructions[idx]; - for (const Operand& op : instr->operands) { - /* prevent it's definining instruction from being DCE'd if it could be rematerialized */ - if (op.isTemp() && ctx.remat.count(op.getTemp())) - ctx.remat_used[ctx.remat[op.getTemp()].instr] = true; - } - instructions.emplace_back(std::move(instr)); - idx++; + instructions.emplace_back(std::move(block->instructions[idx++])); } if (block->register_demand.exceeds(ctx.target_pressure)) @@ -1102,7 +1106,7 @@ if (ctx.renames[block_idx].find(op.getTemp()) != ctx.renames[block_idx].end()) op.setTemp(ctx.renames[block_idx][op.getTemp()]); /* prevent it's definining instruction from being DCE'd if it could be rematerialized */ - if (ctx.remat.count(op.getTemp())) + else if (ctx.remat.count(op.getTemp())) ctx.remat_used[ctx.remat[op.getTemp()].instr] = true; continue; } @@ -1246,16 +1250,6 @@ /* add coupling code to all loop header predecessors */ add_coupling_code(ctx, loop_header, loop_header->index); - /* update remat_used for phis added in add_coupling_code() */ - for (aco_ptr& instr : loop_header->instructions) { - if (!is_phi(instr)) - break; - for (const Operand& op : instr->operands) { - if (op.isTemp() && ctx.remat.count(op.getTemp())) - ctx.remat_used[ctx.remat[op.getTemp()].instr] = true; - } - } - /* propagate new renames through loop: i.e. repair the SSA */ renames.swap(ctx.renames[loop_header->index]); for (std::pair rename : renames) { diff -Nru mesa-20.2.1/src/amd/compiler/tests/test_optimizer.cpp mesa-20.2.6/src/amd/compiler/tests/test_optimizer.cpp --- mesa-20.2.1/src/amd/compiler/tests/test_optimizer.cpp 2020-10-14 17:19:10.293182800 +0000 +++ mesa-20.2.6/src/amd/compiler/tests/test_optimizer.cpp 2020-12-16 21:42:03.542110000 +0000 @@ -80,3 +80,155 @@ finish_opt_test(); } END_TEST + +BEGIN_TEST(optimize.clamp) + //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm + if (!setup_cs("v1 v1 v1", GFX9)) + return; + + //! v1: %res0 = v_med3_f32 4.0, 0, %a + //! p_unit_test 0, %res0 + writeout(0, bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), + bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), inputs[0]))); + + //! v1: %res1 = v_med3_f32 0, 4.0, %a + //! p_unit_test 1, %res1 + writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), + bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), inputs[0]))); + + /* correct NaN behaviour with precise */ + + //! v1: %res2 = v_med3_f32 4.0, 0, %a + //! p_unit_test 2, %res2 + Builder::Result max = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), inputs[0]); + max.def(0).setPrecise(true); + Builder::Result min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), max); + max.def(0).setPrecise(true); + writeout(2, min); + + //! v1: (precise)%res3_tmp = v_min_f32 4.0, %a + //! v1: %res3 = v_max_f32 0, %res3_tmp + //! p_unit_test 3, %res3 + min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), inputs[0]); + min.def(0).setPrecise(true); + writeout(3, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), min)); + + finish_opt_test(); +END_TEST + +BEGIN_TEST(optimize.const_comparison_ordering) + //>> v1: %a, v1: %b, v2: %c, v1: %d, s2: %_:exec = p_startpgm + if (!setup_cs("v1 v1 v2 v1", GFX9)) + return; + + /* optimize to unordered comparison */ + //! s2: %res0 = v_cmp_nge_f32 4.0, %a + //! p_unit_test 0, %res0 + writeout(0, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + + //! s2: %res1 = v_cmp_nge_f32 4.0, %a + //! p_unit_test 1, %res1 + writeout(1, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + + //! s2: %res2 = v_cmp_nge_f32 0x40a00000, %a + //! p_unit_test 2, %res2 + writeout(2, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0]))); + + /* optimize to ordered comparison */ + //! s2: %res3 = v_cmp_lt_f32 4.0, %a + //! p_unit_test 3, %res3 + writeout(3, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + + //! s2: %res4 = v_cmp_lt_f32 4.0, %a + //! p_unit_test 4, %res4 + writeout(4, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + + //! s2: %res5 = v_cmp_lt_f32 0x40a00000, %a + //! p_unit_test 5, %res5 + writeout(5, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0]))); + + /* NaN */ + uint16_t nan16 = 0x7e00; + uint32_t nan32 = 0x7fc00000; + + //! s2: %tmp6_0 = v_cmp_lt_f16 0x7e00, %a + //! s2: %tmp6_1 = v_cmp_neq_f16 %a, %a + //! s2: %res6, s1: %_:scc = s_or_b64 %tmp6_1, %tmp6_0 + //! p_unit_test 6, %res6 + writeout(6, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand(nan16), inputs[0]))); + + //! s2: %tmp7_0 = v_cmp_lt_f32 0x7fc00000, %a + //! s2: %tmp7_1 = v_cmp_neq_f32 %a, %a + //! s2: %res7, s1: %_:scc = s_or_b64 %tmp7_1, %tmp7_0 + //! p_unit_test 7, %res7 + writeout(7, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(nan32), inputs[0]))); + + finish_opt_test(); +END_TEST + +BEGIN_TEST(optimize.add3) + //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm + if (!setup_cs("v1 v1 v1", GFX9)) + return; + + //! v1: %res0 = v_add3_u32 %a, %b, %c + //! p_unit_test 0, %res0 + Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); + + //! v1: %tmp1 = v_add_u32 %b, %c clamp + //! v1: %res1 = v_add_u32 %a, %tmp1 + //! p_unit_test 1, %res1 + tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + static_cast(tmp.instr)->clamp = true; + writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); + + //! v1: %tmp2 = v_add_u32 %b, %c + //! v1: %res2 = v_add_u32 %a, %tmp2 clamp + //! p_unit_test 2, %res2 + tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); + tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp); + static_cast(tmp.instr)->clamp = true; + writeout(2, tmp); + + finish_opt_test(); +END_TEST + +BEGIN_TEST(optimize.minmax) + for (unsigned i = GFX8; i <= GFX10; i++) { + //>> v1: %a, s2: %_:exec = p_startpgm + if (!setup_cs("v1", (chip_class)i)) + continue; + + //! v1: %res0 = v_max3_f32 0, -0, %a + //! p_unit_test 0, %res0 + Temp xor0 = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), Operand(inputs[0])); + Temp min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), xor0); + Temp xor1 = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), min); + writeout(0, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), xor1)); + + //! v1: %res1 = v_max3_f32 0, -0, -%a + //! p_unit_test 1, %res1 + min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), Operand(inputs[0])); + xor1 = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), min); + writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), xor1)); + + finish_opt_test(); + } +END_TEST diff -Nru mesa-20.2.1/src/amd/llvm/ac_llvm_build.c mesa-20.2.6/src/amd/llvm/ac_llvm_build.c --- mesa-20.2.1/src/amd/llvm/ac_llvm_build.c 2020-10-14 17:19:10.293182800 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_llvm_build.c 2020-12-16 21:42:03.543110000 +0000 @@ -25,324 +25,294 @@ /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ #include "ac_llvm_build.h" -#include -#include - -#include "c11/threads.h" - -#include -#include - +#include "ac_exp_param.h" #include "ac_llvm_util.h" #include "ac_shader_util.h" -#include "ac_exp_param.h" +#include "c11/threads.h" +#include "shader_enums.h" +#include "sid.h" #include "util/bitscan.h" #include "util/macros.h" #include "util/u_atomic.h" #include "util/u_math.h" -#include "sid.h" +#include +#include -#include "shader_enums.h" +#include +#include #define AC_LLVM_INITIAL_CF_DEPTH 4 /* Data for if/else/endif and bgnloop/endloop control flow structures. */ struct ac_llvm_flow { - /* Loop exit or next part of if/else/endif. */ - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef loop_entry_block; + /* Loop exit or next part of if/else/endif. */ + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef loop_entry_block; }; /* Initialize module-independent parts of the context. * * The caller is responsible for initializing ctx::module and ctx::builder. */ -void -ac_llvm_context_init(struct ac_llvm_context *ctx, - struct ac_llvm_compiler *compiler, - enum chip_class chip_class, enum radeon_family family, - enum ac_float_mode float_mode, unsigned wave_size, - unsigned ballot_mask_bits) -{ - ctx->context = LLVMContextCreate(); - - ctx->chip_class = chip_class; - ctx->family = family; - ctx->wave_size = wave_size; - ctx->ballot_mask_bits = ballot_mask_bits; - ctx->float_mode = float_mode; - ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32 - : compiler->tm, - ctx->context); - ctx->builder = ac_create_builder(ctx->context, float_mode); - - ctx->voidt = LLVMVoidTypeInContext(ctx->context); - ctx->i1 = LLVMInt1TypeInContext(ctx->context); - ctx->i8 = LLVMInt8TypeInContext(ctx->context); - ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); - ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); - ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); - ctx->i128 = LLVMIntTypeInContext(ctx->context, 128); - ctx->intptr = ctx->i32; - ctx->f16 = LLVMHalfTypeInContext(ctx->context); - ctx->f32 = LLVMFloatTypeInContext(ctx->context); - ctx->f64 = LLVMDoubleTypeInContext(ctx->context); - ctx->v2i16 = LLVMVectorType(ctx->i16, 2); - ctx->v4i16 = LLVMVectorType(ctx->i16, 4); - ctx->v2f16 = LLVMVectorType(ctx->f16, 2); - ctx->v4f16 = LLVMVectorType(ctx->f16, 4); - ctx->v2i32 = LLVMVectorType(ctx->i32, 2); - ctx->v3i32 = LLVMVectorType(ctx->i32, 3); - ctx->v4i32 = LLVMVectorType(ctx->i32, 4); - ctx->v2f32 = LLVMVectorType(ctx->f32, 2); - ctx->v3f32 = LLVMVectorType(ctx->f32, 3); - ctx->v4f32 = LLVMVectorType(ctx->f32, 4); - ctx->v8i32 = LLVMVectorType(ctx->i32, 8); - ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size); - ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits); - - ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false); - ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false); - ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false); - ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false); - ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); - ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); - ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false); - ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false); - ctx->i128_0 = LLVMConstInt(ctx->i128, 0, false); - ctx->i128_1 = LLVMConstInt(ctx->i128, 1, false); - ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0); - ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0); - ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); - ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0); - ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0); - ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0); - - ctx->i1false = LLVMConstInt(ctx->i1, 0, false); - ctx->i1true = LLVMConstInt(ctx->i1, 1, false); - - ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, - "range", 5); - - ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, - "invariant.load", 14); - - ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, - "amdgpu.uniform", 14); - - ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); - ctx->flow = calloc(1, sizeof(*ctx->flow)); -} - -void -ac_llvm_context_dispose(struct ac_llvm_context *ctx) -{ - free(ctx->flow->stack); - free(ctx->flow); - ctx->flow = NULL; -} - -int -ac_get_llvm_num_components(LLVMValueRef value) -{ - LLVMTypeRef type = LLVMTypeOf(value); - unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind - ? LLVMGetVectorSize(type) - : 1; - return num_components; -} - -LLVMValueRef -ac_llvm_extract_elem(struct ac_llvm_context *ac, - LLVMValueRef value, - int index) -{ - if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) { - assert(index == 0); - return value; - } - - return LLVMBuildExtractElement(ac->builder, value, - LLVMConstInt(ac->i32, index, false), ""); -} - -int -ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) -{ - if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) - type = LLVMGetElementType(type); - - if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) - return LLVMGetIntTypeWidth(type); - - if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { - if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_LDS) - return 32; - } - - if (type == ctx->f16) - return 16; - if (type == ctx->f32) - return 32; - if (type == ctx->f64) - return 64; - - unreachable("Unhandled type kind in get_elem_bits"); -} - -unsigned -ac_get_type_size(LLVMTypeRef type) -{ - LLVMTypeKind kind = LLVMGetTypeKind(type); - - switch (kind) { - case LLVMIntegerTypeKind: - return LLVMGetIntTypeWidth(type) / 8; - case LLVMHalfTypeKind: - return 2; - case LLVMFloatTypeKind: - return 4; - case LLVMDoubleTypeKind: - return 8; - case LLVMPointerTypeKind: - if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT) - return 4; - return 8; - case LLVMVectorTypeKind: - return LLVMGetVectorSize(type) * - ac_get_type_size(LLVMGetElementType(type)); - case LLVMArrayTypeKind: - return LLVMGetArrayLength(type) * - ac_get_type_size(LLVMGetElementType(type)); - default: - assert(0); - return 0; - } +void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler, + enum chip_class chip_class, enum radeon_family family, + enum ac_float_mode float_mode, unsigned wave_size, + unsigned ballot_mask_bits) +{ + ctx->context = LLVMContextCreate(); + + ctx->chip_class = chip_class; + ctx->family = family; + ctx->wave_size = wave_size; + ctx->ballot_mask_bits = ballot_mask_bits; + ctx->float_mode = float_mode; + ctx->module = + ac_create_module(wave_size == 32 ? compiler->tm_wave32 : compiler->tm, ctx->context); + ctx->builder = ac_create_builder(ctx->context, float_mode); + + ctx->voidt = LLVMVoidTypeInContext(ctx->context); + ctx->i1 = LLVMInt1TypeInContext(ctx->context); + ctx->i8 = LLVMInt8TypeInContext(ctx->context); + ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); + ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); + ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); + ctx->i128 = LLVMIntTypeInContext(ctx->context, 128); + ctx->intptr = ctx->i32; + ctx->f16 = LLVMHalfTypeInContext(ctx->context); + ctx->f32 = LLVMFloatTypeInContext(ctx->context); + ctx->f64 = LLVMDoubleTypeInContext(ctx->context); + ctx->v2i16 = LLVMVectorType(ctx->i16, 2); + ctx->v4i16 = LLVMVectorType(ctx->i16, 4); + ctx->v2f16 = LLVMVectorType(ctx->f16, 2); + ctx->v4f16 = LLVMVectorType(ctx->f16, 4); + ctx->v2i32 = LLVMVectorType(ctx->i32, 2); + ctx->v3i32 = LLVMVectorType(ctx->i32, 3); + ctx->v4i32 = LLVMVectorType(ctx->i32, 4); + ctx->v2f32 = LLVMVectorType(ctx->f32, 2); + ctx->v3f32 = LLVMVectorType(ctx->f32, 3); + ctx->v4f32 = LLVMVectorType(ctx->f32, 4); + ctx->v8i32 = LLVMVectorType(ctx->i32, 8); + ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size); + ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits); + + ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false); + ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false); + ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false); + ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false); + ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); + ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); + ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false); + ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false); + ctx->i128_0 = LLVMConstInt(ctx->i128, 0, false); + ctx->i128_1 = LLVMConstInt(ctx->i128, 1, false); + ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0); + ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0); + ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); + ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0); + ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0); + ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0); + + ctx->i1false = LLVMConstInt(ctx->i1, 0, false); + ctx->i1true = LLVMConstInt(ctx->i1, 1, false); + + ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, "range", 5); + + ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, "invariant.load", 14); + + ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14); + + ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); + ctx->flow = calloc(1, sizeof(*ctx->flow)); +} + +void ac_llvm_context_dispose(struct ac_llvm_context *ctx) +{ + free(ctx->flow->stack); + free(ctx->flow); + ctx->flow = NULL; +} + +int ac_get_llvm_num_components(LLVMValueRef value) +{ + LLVMTypeRef type = LLVMTypeOf(value); + unsigned num_components = + LLVMGetTypeKind(type) == LLVMVectorTypeKind ? LLVMGetVectorSize(type) : 1; + return num_components; +} + +LLVMValueRef ac_llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index) +{ + if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) { + assert(index == 0); + return value; + } + + return LLVMBuildExtractElement(ac->builder, value, LLVMConstInt(ac->i32, index, false), ""); +} + +int ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) +{ + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) + type = LLVMGetElementType(type); + + if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) + return LLVMGetIntTypeWidth(type); + + if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { + if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_LDS) + return 32; + } + + if (type == ctx->f16) + return 16; + if (type == ctx->f32) + return 32; + if (type == ctx->f64) + return 64; + + unreachable("Unhandled type kind in get_elem_bits"); +} + +unsigned ac_get_type_size(LLVMTypeRef type) +{ + LLVMTypeKind kind = LLVMGetTypeKind(type); + + switch (kind) { + case LLVMIntegerTypeKind: + return LLVMGetIntTypeWidth(type) / 8; + case LLVMHalfTypeKind: + return 2; + case LLVMFloatTypeKind: + return 4; + case LLVMDoubleTypeKind: + return 8; + case LLVMPointerTypeKind: + if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT) + return 4; + return 8; + case LLVMVectorTypeKind: + return LLVMGetVectorSize(type) * ac_get_type_size(LLVMGetElementType(type)); + case LLVMArrayTypeKind: + return LLVMGetArrayLength(type) * ac_get_type_size(LLVMGetElementType(type)); + default: + assert(0); + return 0; + } } static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) { - if (t == ctx->i8) - return ctx->i8; - else if (t == ctx->f16 || t == ctx->i16) - return ctx->i16; - else if (t == ctx->f32 || t == ctx->i32) - return ctx->i32; - else if (t == ctx->f64 || t == ctx->i64) - return ctx->i64; - else - unreachable("Unhandled integer size"); -} - -LLVMTypeRef -ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t) -{ - if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { - LLVMTypeRef elem_type = LLVMGetElementType(t); - return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), - LLVMGetVectorSize(t)); - } - if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) { - switch (LLVMGetPointerAddressSpace(t)) { - case AC_ADDR_SPACE_GLOBAL: - return ctx->i64; - case AC_ADDR_SPACE_CONST_32BIT: - case AC_ADDR_SPACE_LDS: - return ctx->i32; - default: - unreachable("unhandled address space"); - } - } - return to_integer_type_scalar(ctx, t); -} - -LLVMValueRef -ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v) -{ - LLVMTypeRef type = LLVMTypeOf(v); - if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { - return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), ""); - } - return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), ""); -} - -LLVMValueRef -ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v) -{ - LLVMTypeRef type = LLVMTypeOf(v); - if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) - return v; - return ac_to_integer(ctx, v); + if (t == ctx->i8) + return ctx->i8; + else if (t == ctx->f16 || t == ctx->i16) + return ctx->i16; + else if (t == ctx->f32 || t == ctx->i32) + return ctx->i32; + else if (t == ctx->f64 || t == ctx->i64) + return ctx->i64; + else + unreachable("Unhandled integer size"); +} + +LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t) +{ + if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { + LLVMTypeRef elem_type = LLVMGetElementType(t); + return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), LLVMGetVectorSize(t)); + } + if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) { + switch (LLVMGetPointerAddressSpace(t)) { + case AC_ADDR_SPACE_GLOBAL: + return ctx->i64; + case AC_ADDR_SPACE_CONST_32BIT: + case AC_ADDR_SPACE_LDS: + return ctx->i32; + default: + unreachable("unhandled address space"); + } + } + return to_integer_type_scalar(ctx, t); +} + +LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v) +{ + LLVMTypeRef type = LLVMTypeOf(v); + if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { + return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), ""); + } + return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), ""); +} + +LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v) +{ + LLVMTypeRef type = LLVMTypeOf(v); + if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) + return v; + return ac_to_integer(ctx, v); } static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) { - if (t == ctx->i8) - return ctx->i8; - else if (t == ctx->i16 || t == ctx->f16) - return ctx->f16; - else if (t == ctx->i32 || t == ctx->f32) - return ctx->f32; - else if (t == ctx->i64 || t == ctx->f64) - return ctx->f64; - else - unreachable("Unhandled float size"); -} - -LLVMTypeRef -ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t) -{ - if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { - LLVMTypeRef elem_type = LLVMGetElementType(t); - return LLVMVectorType(to_float_type_scalar(ctx, elem_type), - LLVMGetVectorSize(t)); - } - return to_float_type_scalar(ctx, t); -} - -LLVMValueRef -ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v) -{ - LLVMTypeRef type = LLVMTypeOf(v); - return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), ""); -} - - -LLVMValueRef -ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, - LLVMTypeRef return_type, LLVMValueRef *params, - unsigned param_count, unsigned attrib_mask) -{ - LLVMValueRef function, call; - bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY); - - function = LLVMGetNamedFunction(ctx->module, name); - if (!function) { - LLVMTypeRef param_types[32], function_type; - unsigned i; - - assert(param_count <= 32); - - for (i = 0; i < param_count; ++i) { - assert(params[i]); - param_types[i] = LLVMTypeOf(params[i]); - } - function_type = - LLVMFunctionType(return_type, param_types, param_count, 0); - function = LLVMAddFunction(ctx->module, name, function_type); - - LLVMSetFunctionCallConv(function, LLVMCCallConv); - LLVMSetLinkage(function, LLVMExternalLinkage); - - if (!set_callsite_attrs) - ac_add_func_attributes(ctx->context, function, attrib_mask); - } - - call = LLVMBuildCall(ctx->builder, function, params, param_count, ""); - if (set_callsite_attrs) - ac_add_func_attributes(ctx->context, call, attrib_mask); - return call; + if (t == ctx->i8) + return ctx->i8; + else if (t == ctx->i16 || t == ctx->f16) + return ctx->f16; + else if (t == ctx->i32 || t == ctx->f32) + return ctx->f32; + else if (t == ctx->i64 || t == ctx->f64) + return ctx->f64; + else + unreachable("Unhandled float size"); +} + +LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t) +{ + if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { + LLVMTypeRef elem_type = LLVMGetElementType(t); + return LLVMVectorType(to_float_type_scalar(ctx, elem_type), LLVMGetVectorSize(t)); + } + return to_float_type_scalar(ctx, t); +} + +LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v) +{ + LLVMTypeRef type = LLVMTypeOf(v); + return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), ""); +} + +LLVMValueRef ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, + LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count, + unsigned attrib_mask) +{ + LLVMValueRef function, call; + bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY); + + function = LLVMGetNamedFunction(ctx->module, name); + if (!function) { + LLVMTypeRef param_types[32], function_type; + unsigned i; + + assert(param_count <= 32); + + for (i = 0; i < param_count; ++i) { + assert(params[i]); + param_types[i] = LLVMTypeOf(params[i]); + } + function_type = LLVMFunctionType(return_type, param_types, param_count, 0); + function = LLVMAddFunction(ctx->module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + if (!set_callsite_attrs) + ac_add_func_attributes(ctx->context, function, attrib_mask); + } + + call = LLVMBuildCall(ctx->builder, function, params, param_count, ""); + if (set_callsite_attrs) + ac_add_func_attributes(ctx->context, call, attrib_mask); + return call; } /** @@ -351,59 +321,55 @@ */ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize) { - LLVMTypeRef elem_type = type; + LLVMTypeRef elem_type = type; - assert(bufsize >= 8); + assert(bufsize >= 8); - if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { - int ret = snprintf(buf, bufsize, "v%u", - LLVMGetVectorSize(type)); - if (ret < 0) { - char *type_name = LLVMPrintTypeToString(type); - fprintf(stderr, "Error building type name for: %s\n", - type_name); - LLVMDisposeMessage(type_name); - return; - } - elem_type = LLVMGetElementType(type); - buf += ret; - bufsize -= ret; - } - switch (LLVMGetTypeKind(elem_type)) { - default: break; - case LLVMIntegerTypeKind: - snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); - break; - case LLVMHalfTypeKind: - snprintf(buf, bufsize, "f16"); - break; - case LLVMFloatTypeKind: - snprintf(buf, bufsize, "f32"); - break; - case LLVMDoubleTypeKind: - snprintf(buf, bufsize, "f64"); - break; - } + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { + int ret = snprintf(buf, bufsize, "v%u", LLVMGetVectorSize(type)); + if (ret < 0) { + char *type_name = LLVMPrintTypeToString(type); + fprintf(stderr, "Error building type name for: %s\n", type_name); + LLVMDisposeMessage(type_name); + return; + } + elem_type = LLVMGetElementType(type); + buf += ret; + bufsize -= ret; + } + switch (LLVMGetTypeKind(elem_type)) { + default: + break; + case LLVMIntegerTypeKind: + snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); + break; + case LLVMHalfTypeKind: + snprintf(buf, bufsize, "f16"); + break; + case LLVMFloatTypeKind: + snprintf(buf, bufsize, "f32"); + break; + case LLVMDoubleTypeKind: + snprintf(buf, bufsize, "f64"); + break; + } } /** * Helper function that builds an LLVM IR PHI node and immediately adds * incoming edges. */ -LLVMValueRef -ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, - unsigned count_incoming, LLVMValueRef *values, - LLVMBasicBlockRef *blocks) -{ - LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, ""); - LLVMAddIncoming(phi, values, blocks, count_incoming); - return phi; +LLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigned count_incoming, + LLVMValueRef *values, LLVMBasicBlockRef *blocks) +{ + LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, ""); + LLVMAddIncoming(phi, values, blocks, count_incoming); + return phi; } void ac_build_s_barrier(struct ac_llvm_context *ctx) { - ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, - 0, AC_FUNC_ATTR_CONVERGENT); + ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); } /* Prevent optimizations (at least of memory accesses) across the current @@ -413,375 +379,328 @@ * Optionally, a value can be passed through the inline assembly to prevent * LLVM from hoisting calls to ReadNone functions. */ -void -ac_build_optimization_barrier(struct ac_llvm_context *ctx, - LLVMValueRef *pvgpr) -{ - static int counter = 0; - - LLVMBuilderRef builder = ctx->builder; - char code[16]; - - snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter)); - - if (!pvgpr) { - LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); - LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false); - LLVMBuildCall(builder, inlineasm, NULL, 0, ""); - } else { - LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); - LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false); - LLVMTypeRef type = LLVMTypeOf(*pvgpr); - unsigned bitsize = ac_get_elem_bits(ctx, type); - LLVMValueRef vgpr = *pvgpr; - LLVMTypeRef vgpr_type; - unsigned vgpr_size; - LLVMValueRef vgpr0; - - if (bitsize < 32) - vgpr = LLVMBuildZExt(ctx->builder, vgpr, ctx->i32, ""); - - vgpr_type = LLVMTypeOf(vgpr); - vgpr_size = ac_get_type_size(vgpr_type); - - assert(vgpr_size % 4 == 0); - - vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), ""); - vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, ""); - vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, ""); - vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, ""); - vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, ""); - - if (bitsize < 32) - vgpr = LLVMBuildTrunc(builder, vgpr, type, ""); - - *pvgpr = vgpr; - } -} - -LLVMValueRef -ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope) -{ - const char *name = scope == NIR_SCOPE_DEVICE ? "llvm.amdgcn.s.memrealtime" : "llvm.amdgcn.s.memtime"; - LLVMValueRef tmp = ac_build_intrinsic(ctx, name, ctx->i64, NULL, 0, 0); - return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, ""); -} - -LLVMValueRef -ac_build_ballot(struct ac_llvm_context *ctx, - LLVMValueRef value) -{ - const char *name; - - if (LLVM_VERSION_MAJOR >= 9) { - if (ctx->wave_size == 64) - name = "llvm.amdgcn.icmp.i64.i32"; - else - name = "llvm.amdgcn.icmp.i32.i32"; - } else { - name = "llvm.amdgcn.icmp.i32"; - } - LLVMValueRef args[3] = { - value, - ctx->i32_0, - LLVMConstInt(ctx->i32, LLVMIntNE, 0) - }; - - /* We currently have no other way to prevent LLVM from lifting the icmp - * calls to a dominating basic block. - */ - ac_build_optimization_barrier(ctx, &args[0]); - - args[0] = ac_to_integer(ctx, args[0]); - - return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3, - AC_FUNC_ATTR_NOUNWIND | - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); -} - -LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, - LLVMValueRef value) -{ - const char *name; - - if (LLVM_VERSION_MAJOR >= 9) { - if (ctx->wave_size == 64) - name = "llvm.amdgcn.icmp.i64.i1"; - else - name = "llvm.amdgcn.icmp.i32.i1"; - } else { - name = "llvm.amdgcn.icmp.i1"; - } - LLVMValueRef args[3] = { - value, - ctx->i1false, - LLVMConstInt(ctx->i32, LLVMIntNE, 0), - }; - - return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3, - AC_FUNC_ATTR_NOUNWIND | - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); -} - -LLVMValueRef -ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value) -{ - LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); - LLVMValueRef vote_set = ac_build_ballot(ctx, value); - return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, ""); -} - -LLVMValueRef -ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value) -{ - LLVMValueRef vote_set = ac_build_ballot(ctx, value); - return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, - LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); -} - -LLVMValueRef -ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) -{ - LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); - LLVMValueRef vote_set = ac_build_ballot(ctx, value); - - LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ, - vote_set, active_set, ""); - LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ, - vote_set, - LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); - return LLVMBuildOr(ctx->builder, all, none, ""); -} - -LLVMValueRef -ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, - unsigned value_count, unsigned component) -{ - LLVMValueRef vec = NULL; - - if (value_count == 1) { - return values[component]; - } else if (!value_count) - unreachable("value_count is 0"); - - for (unsigned i = component; i < value_count + component; i++) { - LLVMValueRef value = values[i]; - - if (i == component) - vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); - LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false); - vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, ""); - } - return vec; -} - -LLVMValueRef -ac_build_gather_values_extended(struct ac_llvm_context *ctx, - LLVMValueRef *values, - unsigned value_count, - unsigned value_stride, - bool load, - bool always_vector) -{ - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef vec = NULL; - unsigned i; - - if (value_count == 1 && !always_vector) { - if (load) - return LLVMBuildLoad(builder, values[0], ""); - return values[0]; - } else if (!value_count) - unreachable("value_count is 0"); - - for (i = 0; i < value_count; i++) { - LLVMValueRef value = values[i * value_stride]; - if (load) - value = LLVMBuildLoad(builder, value, ""); - - if (!i) - vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); - LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); - vec = LLVMBuildInsertElement(builder, vec, value, index, ""); - } - return vec; -} - -LLVMValueRef -ac_build_gather_values(struct ac_llvm_context *ctx, - LLVMValueRef *values, - unsigned value_count) +void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pvgpr) { - return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false); + static int counter = 0; + + LLVMBuilderRef builder = ctx->builder; + char code[16]; + + snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter)); + + if (!pvgpr) { + LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); + LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false); + LLVMBuildCall(builder, inlineasm, NULL, 0, ""); + } else { + LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); + LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false); + LLVMTypeRef type = LLVMTypeOf(*pvgpr); + unsigned bitsize = ac_get_elem_bits(ctx, type); + LLVMValueRef vgpr = *pvgpr; + LLVMTypeRef vgpr_type; + unsigned vgpr_size; + LLVMValueRef vgpr0; + + if (bitsize < 32) + vgpr = LLVMBuildZExt(ctx->builder, vgpr, ctx->i32, ""); + + vgpr_type = LLVMTypeOf(vgpr); + vgpr_size = ac_get_type_size(vgpr_type); + + assert(vgpr_size % 4 == 0); + + vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), ""); + vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, ""); + vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, ""); + vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, ""); + vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, ""); + + if (bitsize < 32) + vgpr = LLVMBuildTrunc(builder, vgpr, type, ""); + + *pvgpr = vgpr; + } +} + +LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope) +{ + const char *name = + scope == NIR_SCOPE_DEVICE ? "llvm.amdgcn.s.memrealtime" : "llvm.amdgcn.s.memtime"; + LLVMValueRef tmp = ac_build_intrinsic(ctx, name, ctx->i64, NULL, 0, 0); + return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, ""); +} + +LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value) +{ + const char *name; + + if (LLVM_VERSION_MAJOR >= 9) { + if (ctx->wave_size == 64) + name = "llvm.amdgcn.icmp.i64.i32"; + else + name = "llvm.amdgcn.icmp.i32.i32"; + } else { + name = "llvm.amdgcn.icmp.i32"; + } + LLVMValueRef args[3] = {value, ctx->i32_0, LLVMConstInt(ctx->i32, LLVMIntNE, 0)}; + + /* We currently have no other way to prevent LLVM from lifting the icmp + * calls to a dominating basic block. + */ + ac_build_optimization_barrier(ctx, &args[0]); + + args[0] = ac_to_integer(ctx, args[0]); + + return ac_build_intrinsic( + ctx, name, ctx->iN_wavemask, args, 3, + AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); +} + +LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, LLVMValueRef value) +{ + const char *name; + + if (LLVM_VERSION_MAJOR >= 9) { + if (ctx->wave_size == 64) + name = "llvm.amdgcn.icmp.i64.i1"; + else + name = "llvm.amdgcn.icmp.i32.i1"; + } else { + name = "llvm.amdgcn.icmp.i1"; + } + LLVMValueRef args[3] = { + value, + ctx->i1false, + LLVMConstInt(ctx->i32, LLVMIntNE, 0), + }; + + return ac_build_intrinsic( + ctx, name, ctx->iN_wavemask, args, 3, + AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); +} + +LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value) +{ + LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); + LLVMValueRef vote_set = ac_build_ballot(ctx, value); + return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, ""); +} + +LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value) +{ + LLVMValueRef vote_set = ac_build_ballot(ctx, value); + return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0), + ""); +} + +LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) +{ + LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); + LLVMValueRef vote_set = ac_build_ballot(ctx, value); + + LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, ""); + LLVMValueRef none = + LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); + return LLVMBuildOr(ctx->builder, all, none, ""); +} + +LLVMValueRef ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned component) +{ + LLVMValueRef vec = NULL; + + if (value_count == 1) { + return values[component]; + } else if (!value_count) + unreachable("value_count is 0"); + + for (unsigned i = component; i < value_count + component; i++) { + LLVMValueRef value = values[i]; + + if (i == component) + vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count)); + LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false); + vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, ""); + } + return vec; +} + +LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned value_stride, bool load, + bool always_vector) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef vec = NULL; + unsigned i; + + if (value_count == 1 && !always_vector) { + if (load) + return LLVMBuildLoad(builder, values[0], ""); + return values[0]; + } else if (!value_count) + unreachable("value_count is 0"); + + for (i = 0; i < value_count; i++) { + LLVMValueRef value = values[i * value_stride]; + if (load) + value = LLVMBuildLoad(builder, value, ""); + + if (!i) + vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count)); + LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); + vec = LLVMBuildInsertElement(builder, vec, value, index, ""); + } + return vec; +} + +LLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count) +{ + return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false); } /* Expand a scalar or vector to by filling the remaining * channels with undef. Extract at most src_channels components from the input. */ -static LLVMValueRef -ac_build_expand(struct ac_llvm_context *ctx, - LLVMValueRef value, - unsigned src_channels, - unsigned dst_channels) -{ - LLVMTypeRef elemtype; - LLVMValueRef chan[dst_channels]; - - if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) { - unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value)); - - if (src_channels == dst_channels && vec_size == dst_channels) - return value; - - src_channels = MIN2(src_channels, vec_size); - - for (unsigned i = 0; i < src_channels; i++) - chan[i] = ac_llvm_extract_elem(ctx, value, i); - - elemtype = LLVMGetElementType(LLVMTypeOf(value)); - } else { - if (src_channels) { - assert(src_channels == 1); - chan[0] = value; - } - elemtype = LLVMTypeOf(value); - } +static LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx, LLVMValueRef value, + unsigned src_channels, unsigned dst_channels) +{ + LLVMTypeRef elemtype; + LLVMValueRef chan[dst_channels]; + + if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) { + unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value)); - for (unsigned i = src_channels; i < dst_channels; i++) - chan[i] = LLVMGetUndef(elemtype); + if (src_channels == dst_channels && vec_size == dst_channels) + return value; - return ac_build_gather_values(ctx, chan, dst_channels); + src_channels = MIN2(src_channels, vec_size); + + for (unsigned i = 0; i < src_channels; i++) + chan[i] = ac_llvm_extract_elem(ctx, value, i); + + elemtype = LLVMGetElementType(LLVMTypeOf(value)); + } else { + if (src_channels) { + assert(src_channels == 1); + chan[0] = value; + } + elemtype = LLVMTypeOf(value); + } + + for (unsigned i = src_channels; i < dst_channels; i++) + chan[i] = LLVMGetUndef(elemtype); + + return ac_build_gather_values(ctx, chan, dst_channels); } /* Extract components [start, start + channels) from a vector. */ -LLVMValueRef -ac_extract_components(struct ac_llvm_context *ctx, - LLVMValueRef value, - unsigned start, - unsigned channels) +LLVMValueRef ac_extract_components(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned start, + unsigned channels) { - LLVMValueRef chan[channels]; + LLVMValueRef chan[channels]; - for (unsigned i = 0; i < channels; i++) - chan[i] = ac_llvm_extract_elem(ctx, value, i + start); + for (unsigned i = 0; i < channels; i++) + chan[i] = ac_llvm_extract_elem(ctx, value, i + start); - return ac_build_gather_values(ctx, chan, channels); + return ac_build_gather_values(ctx, chan, channels); } /* Expand a scalar or vector to <4 x type> by filling the remaining channels * with undef. Extract at most num_channels components from the input. */ -LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, - LLVMValueRef value, - unsigned num_channels) +LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value, + unsigned num_channels) { - return ac_build_expand(ctx, value, num_channels, 4); + return ac_build_expand(ctx, value, num_channels, 4); } LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value) { - unsigned type_size = ac_get_type_size(LLVMTypeOf(value)); - const char *name; + unsigned type_size = ac_get_type_size(LLVMTypeOf(value)); + const char *name; + + if (type_size == 2) + name = "llvm.rint.f16"; + else if (type_size == 4) + name = "llvm.rint.f32"; + else + name = "llvm.rint.f64"; + + return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1, AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den) +{ + unsigned type_size = ac_get_type_size(LLVMTypeOf(den)); + const char *name; + + /* For doubles, we need precise division to pass GLCTS. */ + if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && type_size == 8) + return LLVMBuildFDiv(ctx->builder, num, den, ""); - if (type_size == 2) - name = "llvm.rint.f16"; - else if (type_size == 4) - name = "llvm.rint.f32"; - else - name = "llvm.rint.f64"; - - return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1, - AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef -ac_build_fdiv(struct ac_llvm_context *ctx, - LLVMValueRef num, - LLVMValueRef den) -{ - unsigned type_size = ac_get_type_size(LLVMTypeOf(den)); - const char *name; - - /* For doubles, we need precise division to pass GLCTS. */ - if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && - type_size == 8) - return LLVMBuildFDiv(ctx->builder, num, den, ""); - - if (type_size == 2) - name = "llvm.amdgcn.rcp.f16"; - else if (type_size == 4) - name = "llvm.amdgcn.rcp.f32"; - else - name = "llvm.amdgcn.rcp.f64"; + if (type_size == 2) + name = "llvm.amdgcn.rcp.f16"; + else if (type_size == 4) + name = "llvm.amdgcn.rcp.f32"; + else + name = "llvm.amdgcn.rcp.f64"; - LLVMValueRef rcp = ac_build_intrinsic(ctx, name, LLVMTypeOf(den), - &den, 1, AC_FUNC_ATTR_READNONE); + LLVMValueRef rcp = + ac_build_intrinsic(ctx, name, LLVMTypeOf(den), &den, 1, AC_FUNC_ATTR_READNONE); - return LLVMBuildFMul(ctx->builder, num, rcp, ""); + return LLVMBuildFMul(ctx->builder, num, rcp, ""); } /* See fast_idiv_by_const.h. */ /* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */ -LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, - LLVMValueRef num, - LLVMValueRef multiplier, - LLVMValueRef pre_shift, - LLVMValueRef post_shift, - LLVMValueRef increment) -{ - LLVMBuilderRef builder = ctx->builder; - - num = LLVMBuildLShr(builder, num, pre_shift, ""); - num = LLVMBuildMul(builder, - LLVMBuildZExt(builder, num, ctx->i64, ""), - LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); - num = LLVMBuildAdd(builder, num, - LLVMBuildZExt(builder, increment, ctx->i64, ""), ""); - num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); - num = LLVMBuildTrunc(builder, num, ctx->i32, ""); - return LLVMBuildLShr(builder, num, post_shift, ""); +LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, LLVMValueRef num, + LLVMValueRef multiplier, LLVMValueRef pre_shift, + LLVMValueRef post_shift, LLVMValueRef increment) +{ + LLVMBuilderRef builder = ctx->builder; + + num = LLVMBuildLShr(builder, num, pre_shift, ""); + num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""), + LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); + num = LLVMBuildAdd(builder, num, LLVMBuildZExt(builder, increment, ctx->i64, ""), ""); + num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); + num = LLVMBuildTrunc(builder, num, ctx->i32, ""); + return LLVMBuildLShr(builder, num, post_shift, ""); } /* See fast_idiv_by_const.h. */ /* If num != UINT_MAX, this more efficient version can be used. */ /* Set: increment = util_fast_udiv_info::increment; */ -LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, - LLVMValueRef num, - LLVMValueRef multiplier, - LLVMValueRef pre_shift, - LLVMValueRef post_shift, - LLVMValueRef increment) -{ - LLVMBuilderRef builder = ctx->builder; - - num = LLVMBuildLShr(builder, num, pre_shift, ""); - num = LLVMBuildNUWAdd(builder, num, increment, ""); - num = LLVMBuildMul(builder, - LLVMBuildZExt(builder, num, ctx->i64, ""), - LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); - num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); - num = LLVMBuildTrunc(builder, num, ctx->i32, ""); - return LLVMBuildLShr(builder, num, post_shift, ""); +LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef num, + LLVMValueRef multiplier, LLVMValueRef pre_shift, + LLVMValueRef post_shift, LLVMValueRef increment) +{ + LLVMBuilderRef builder = ctx->builder; + + num = LLVMBuildLShr(builder, num, pre_shift, ""); + num = LLVMBuildNUWAdd(builder, num, increment, ""); + num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""), + LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); + num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); + num = LLVMBuildTrunc(builder, num, ctx->i32, ""); + return LLVMBuildLShr(builder, num, post_shift, ""); } /* See fast_idiv_by_const.h. */ /* Both operands must fit in 31 bits and the divisor must not be 1. */ -LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, - LLVMValueRef num, - LLVMValueRef multiplier, - LLVMValueRef post_shift) -{ - LLVMBuilderRef builder = ctx->builder; - - num = LLVMBuildMul(builder, - LLVMBuildZExt(builder, num, ctx->i64, ""), - LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); - num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); - num = LLVMBuildTrunc(builder, num, ctx->i32, ""); - return LLVMBuildLShr(builder, num, post_shift, ""); +LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num, + LLVMValueRef multiplier, LLVMValueRef post_shift) +{ + LLVMBuilderRef builder = ctx->builder; + + num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""), + LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); + num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); + num = LLVMBuildTrunc(builder, num, ctx->i32, ""); + return LLVMBuildLShr(builder, num, post_shift, ""); } /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 @@ -789,26 +708,20 @@ * already multiplied by two. id is the cube face number. */ struct cube_selection_coords { - LLVMValueRef stc[2]; - LLVMValueRef ma; - LLVMValueRef id; + LLVMValueRef stc[2]; + LLVMValueRef ma; + LLVMValueRef id; }; -static void -build_cube_intrinsic(struct ac_llvm_context *ctx, - LLVMValueRef in[3], - struct cube_selection_coords *out) -{ - LLVMTypeRef f32 = ctx->f32; - - out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", - f32, in, 3, AC_FUNC_ATTR_READNONE); +static void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3], + struct cube_selection_coords *out) +{ + LLVMTypeRef f32 = ctx->f32; + + out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", f32, in, 3, AC_FUNC_ATTR_READNONE); + out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", f32, in, 3, AC_FUNC_ATTR_READNONE); + out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", f32, in, 3, AC_FUNC_ATTR_READNONE); + out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", f32, in, 3, AC_FUNC_ATTR_READNONE); } /** @@ -821,281 +734,250 @@ * the selcoords major axis. */ static void build_cube_select(struct ac_llvm_context *ctx, - const struct cube_selection_coords *selcoords, - const LLVMValueRef *coords, - LLVMValueRef *out_st, - LLVMValueRef *out_ma) -{ - LLVMBuilderRef builder = ctx->builder; - LLVMTypeRef f32 = LLVMTypeOf(coords[0]); - LLVMValueRef is_ma_positive; - LLVMValueRef sgn_ma; - LLVMValueRef is_ma_z, is_not_ma_z; - LLVMValueRef is_ma_y; - LLVMValueRef is_ma_x; - LLVMValueRef sgn; - LLVMValueRef tmp; - - is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, - selcoords->ma, LLVMConstReal(f32, 0.0), ""); - sgn_ma = LLVMBuildSelect(builder, is_ma_positive, - LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); - - is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); - is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); - is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, - LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); - is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); - - /* Select sc */ - tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], ""); - sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), - LLVMBuildSelect(builder, is_ma_z, sgn_ma, - LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); - out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); - - /* Select tc */ - tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); - sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, - LLVMConstReal(f32, -1.0), ""); - out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); - - /* Select ma */ - tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], - LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); - tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", - ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE); - *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), ""); -} - -void -ac_prepare_cube_coords(struct ac_llvm_context *ctx, - bool is_deriv, bool is_array, bool is_lod, - LLVMValueRef *coords_arg, - LLVMValueRef *derivs_arg) -{ - - LLVMBuilderRef builder = ctx->builder; - struct cube_selection_coords selcoords; - LLVMValueRef coords[3]; - LLVMValueRef invma; - - if (is_array && !is_lod) { - LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]); - - /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says: - * - * "For Array forms, the array layer used will be - * - * max(0, min(d−1, floor(layer+0.5))) - * - * where d is the depth of the texture array and layer - * comes from the component indicated in the tables below. - * Workaroudn for an issue where the layer is taken from a - * helper invocation which happens to fall on a different - * layer due to extrapolation." - * - * GFX8 and earlier attempt to implement this in hardware by - * clamping the value of coords[2] = (8 * layer) + face. - * Unfortunately, this means that the we end up with the wrong - * face when clamping occurs. - * - * Clamp the layer earlier to work around the issue. - */ - if (ctx->chip_class <= GFX8) { - LLVMValueRef ge0; - ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, ""); - tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, ""); - } - - coords_arg[3] = tmp; - } - - build_cube_intrinsic(ctx, coords_arg, &selcoords); - - invma = ac_build_intrinsic(ctx, "llvm.fabs.f32", - ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); - invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); - - for (int i = 0; i < 2; ++i) - coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); - - coords[2] = selcoords.id; - - if (is_deriv && derivs_arg) { - LLVMValueRef derivs[4]; - int axis; - - /* Convert cube derivatives to 2D derivatives. */ - for (axis = 0; axis < 2; axis++) { - LLVMValueRef deriv_st[2]; - LLVMValueRef deriv_ma; - - /* Transform the derivative alongside the texture - * coordinate. Mathematically, the correct formula is - * as follows. Assume we're projecting onto the +Z face - * and denote by dx/dh the derivative of the (original) - * X texture coordinate with respect to horizontal - * window coordinates. The projection onto the +Z face - * plane is: - * - * f(x,z) = x/z - * - * Then df/dh = df/dx * dx/dh + df/dz * dz/dh - * = 1/z * dx/dh - x/z * 1/z * dz/dh. - * - * This motivatives the implementation below. - * - * Whether this actually gives the expected results for - * apps that might feed in derivatives obtained via - * finite differences is anyone's guess. The OpenGL spec - * seems awfully quiet about how textureGrad for cube - * maps should be handled. - */ - build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], - deriv_st, &deriv_ma); - - deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); - - for (int i = 0; i < 2; ++i) - derivs[axis * 2 + i] = - LLVMBuildFSub(builder, - LLVMBuildFMul(builder, deriv_st[i], invma, ""), - LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); - } - - memcpy(derivs_arg, derivs, sizeof(derivs)); - } - - /* Shift the texture coordinate. This must be applied after the - * derivative calculation. - */ - for (int i = 0; i < 2; ++i) - coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); - - if (is_array) { - /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ - /* coords_arg.w component - array_index for cube arrays */ - coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]); - } - - memcpy(coords_arg, coords, sizeof(coords)); -} - - -LLVMValueRef -ac_build_fs_interp(struct ac_llvm_context *ctx, - LLVMValueRef llvm_chan, - LLVMValueRef attr_number, - LLVMValueRef params, - LLVMValueRef i, - LLVMValueRef j) -{ - LLVMValueRef args[5]; - LLVMValueRef p1; - - args[0] = i; - args[1] = llvm_chan; - args[2] = attr_number; - args[3] = params; - - p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", - ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); - - args[0] = p1; - args[1] = j; - args[2] = llvm_chan; - args[3] = attr_number; - args[4] = params; - - return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", - ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef -ac_build_fs_interp_f16(struct ac_llvm_context *ctx, - LLVMValueRef llvm_chan, - LLVMValueRef attr_number, - LLVMValueRef params, - LLVMValueRef i, - LLVMValueRef j) -{ - LLVMValueRef args[6]; - LLVMValueRef p1; - - args[0] = i; - args[1] = llvm_chan; - args[2] = attr_number; - args[3] = ctx->i1false; - args[4] = params; - - p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", - ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); - - args[0] = p1; - args[1] = j; - args[2] = llvm_chan; - args[3] = attr_number; - args[4] = ctx->i1false; - args[5] = params; - - return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", - ctx->f16, args, 6, AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef -ac_build_fs_interp_mov(struct ac_llvm_context *ctx, - LLVMValueRef parameter, - LLVMValueRef llvm_chan, - LLVMValueRef attr_number, - LLVMValueRef params) -{ - LLVMValueRef args[4]; - - args[0] = parameter; - args[1] = llvm_chan; - args[2] = attr_number; - args[3] = params; - - return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov", - ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef -ac_build_gep_ptr(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, - LLVMValueRef index) -{ - return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, ""); -} - -LLVMValueRef -ac_build_gep0(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, - LLVMValueRef index) -{ - LLVMValueRef indices[2] = { - ctx->i32_0, - index, - }; - return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, ""); -} - -LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, - LLVMValueRef index) -{ - return LLVMBuildPointerCast(ctx->builder, - LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""), - LLVMTypeOf(ptr), ""); -} - -void -ac_build_indexed_store(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index, - LLVMValueRef value) + const struct cube_selection_coords *selcoords, + const LLVMValueRef *coords, LLVMValueRef *out_st, + LLVMValueRef *out_ma) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMTypeRef f32 = LLVMTypeOf(coords[0]); + LLVMValueRef is_ma_positive; + LLVMValueRef sgn_ma; + LLVMValueRef is_ma_z, is_not_ma_z; + LLVMValueRef is_ma_y; + LLVMValueRef is_ma_x; + LLVMValueRef sgn; + LLVMValueRef tmp; + + is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), ""); + sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0), + LLVMConstReal(f32, -1.0), ""); + + is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); + is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); + is_ma_y = LLVMBuildAnd( + builder, is_not_ma_z, + LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); + is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); + + /* Select sc */ + tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], ""); + sgn = LLVMBuildSelect( + builder, is_ma_y, LLVMConstReal(f32, 1.0), + LLVMBuildSelect(builder, is_ma_z, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); + out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); + + /* Select tc */ + tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); + sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, LLVMConstReal(f32, -1.0), ""); + out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); + + /* Select ma */ + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], + LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); + tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE); + *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), ""); +} + +void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod, + LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg) +{ + + LLVMBuilderRef builder = ctx->builder; + struct cube_selection_coords selcoords; + LLVMValueRef coords[3]; + LLVMValueRef invma; + + if (is_array && !is_lod) { + LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]); + + /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says: + * + * "For Array forms, the array layer used will be + * + * max(0, min(d−1, floor(layer+0.5))) + * + * where d is the depth of the texture array and layer + * comes from the component indicated in the tables below. + * Workaroudn for an issue where the layer is taken from a + * helper invocation which happens to fall on a different + * layer due to extrapolation." + * + * GFX8 and earlier attempt to implement this in hardware by + * clamping the value of coords[2] = (8 * layer) + face. + * Unfortunately, this means that the we end up with the wrong + * face when clamping occurs. + * + * Clamp the layer earlier to work around the issue. + */ + if (ctx->chip_class <= GFX8) { + LLVMValueRef ge0; + ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, ""); + tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, ""); + } + + coords_arg[3] = tmp; + } + + build_cube_intrinsic(ctx, coords_arg, &selcoords); + + invma = + ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); + invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); + + for (int i = 0; i < 2; ++i) + coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); + + coords[2] = selcoords.id; + + if (is_deriv && derivs_arg) { + LLVMValueRef derivs[4]; + int axis; + + /* Convert cube derivatives to 2D derivatives. */ + for (axis = 0; axis < 2; axis++) { + LLVMValueRef deriv_st[2]; + LLVMValueRef deriv_ma; + + /* Transform the derivative alongside the texture + * coordinate. Mathematically, the correct formula is + * as follows. Assume we're projecting onto the +Z face + * and denote by dx/dh the derivative of the (original) + * X texture coordinate with respect to horizontal + * window coordinates. The projection onto the +Z face + * plane is: + * + * f(x,z) = x/z + * + * Then df/dh = df/dx * dx/dh + df/dz * dz/dh + * = 1/z * dx/dh - x/z * 1/z * dz/dh. + * + * This motivatives the implementation below. + * + * Whether this actually gives the expected results for + * apps that might feed in derivatives obtained via + * finite differences is anyone's guess. The OpenGL spec + * seems awfully quiet about how textureGrad for cube + * maps should be handled. + */ + build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma); + + deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); + + for (int i = 0; i < 2; ++i) + derivs[axis * 2 + i] = + LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""), + LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); + } + + memcpy(derivs_arg, derivs, sizeof(derivs)); + } + + /* Shift the texture coordinate. This must be applied after the + * derivative calculation. + */ + for (int i = 0; i < 2; ++i) + coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); + + if (is_array) { + /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ + /* coords_arg.w component - array_index for cube arrays */ + coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]); + } + + memcpy(coords_arg, coords, sizeof(coords)); +} + +LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, + LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, + LLVMValueRef j) +{ + LLVMValueRef args[5]; + LLVMValueRef p1; + + args[0] = i; + args[1] = llvm_chan; + args[2] = attr_number; + args[3] = params; + + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); + + args[0] = p1; + args[1] = j; + args[2] = llvm_chan; + args[3] = attr_number; + args[4] = params; + + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", ctx->f32, args, 5, + AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_fs_interp_f16(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, + LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, + LLVMValueRef j) +{ + LLVMValueRef args[6]; + LLVMValueRef p1; + + args[0] = i; + args[1] = llvm_chan; + args[2] = attr_number; + args[3] = ctx->i1false; + args[4] = params; + + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", ctx->f32, args, 5, + AC_FUNC_ATTR_READNONE); + + args[0] = p1; + args[1] = j; + args[2] = llvm_chan; + args[3] = attr_number; + args[4] = ctx->i1false; + args[5] = params; + + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", ctx->f16, args, 6, + AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter, + LLVMValueRef llvm_chan, LLVMValueRef attr_number, + LLVMValueRef params) +{ + LLVMValueRef args[4]; + + args[0] = parameter; + args[1] = llvm_chan; + args[2] = attr_number; + args[3] = params; + + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov", ctx->f32, args, 4, + AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_gep_ptr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index) +{ + return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, ""); +} + +LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) +{ + LLVMValueRef indices[2] = { + ctx->i32_0, + index, + }; + return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, ""); +} + +LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMValueRef index) +{ + return LLVMBuildPointerCast(ctx->builder, LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""), + LLVMTypeOf(ptr), ""); +} + +void ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index, + LLVMValueRef value) { - LLVMBuildStore(ctx->builder, value, - ac_build_gep0(ctx, base_ptr, index)); + LLVMBuildStore(ctx->builder, value, ac_build_gep0(ctx, base_ptr, index)); } /** @@ -1126,425 +1008,328 @@ * ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize); * sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds */ -static LLVMValueRef -ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, - LLVMValueRef index, bool uniform, bool invariant, - bool no_unsigned_wraparound) +static LLVMValueRef ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index, bool uniform, bool invariant, + bool no_unsigned_wraparound) { - LLVMValueRef pointer, result; + LLVMValueRef pointer, result; - if (no_unsigned_wraparound && - LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT) - pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, ""); - else - pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, ""); + if (no_unsigned_wraparound && + LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT) + pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, ""); + else + pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, ""); - if (uniform) - LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); - result = LLVMBuildLoad(ctx->builder, pointer, ""); - if (invariant) - LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); - return result; + if (uniform) + LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); + result = LLVMBuildLoad(ctx->builder, pointer, ""); + if (invariant) + LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); + return result; } -LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, - LLVMValueRef index) +LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) { - return ac_build_load_custom(ctx, base_ptr, index, false, false, false); + return ac_build_load_custom(ctx, base_ptr, index, false, false, false); } -LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index) +LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index) { - return ac_build_load_custom(ctx, base_ptr, index, false, true, false); + return ac_build_load_custom(ctx, base_ptr, index, false, true, false); } /* This assumes that there is no unsigned integer wraparound during the address * computation, excluding all GEPs within base_ptr. */ -LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index) +LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index) { - return ac_build_load_custom(ctx, base_ptr, index, true, true, true); + return ac_build_load_custom(ctx, base_ptr, index, true, true, true); } /* See ac_build_load_custom() documentation. */ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index) + LLVMValueRef base_ptr, LLVMValueRef index) { - return ac_build_load_custom(ctx, base_ptr, index, true, true, false); + return ac_build_load_custom(ctx, base_ptr, index, true, true, false); } -static unsigned get_load_cache_policy(struct ac_llvm_context *ctx, - unsigned cache_policy) +static unsigned get_load_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy) { - return cache_policy | - (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0); -} - -static void -ac_build_buffer_store_common(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef data, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned cache_policy, - bool use_format, - bool structurized) -{ - LLVMValueRef args[6]; - int idx = 0; - args[idx++] = data; - args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); - if (structurized) - args[idx++] = vindex ? vindex : ctx->i32_0; - args[idx++] = voffset ? voffset : ctx->i32_0; - args[idx++] = soffset ? soffset : ctx->i32_0; - args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0); - const char *indexing_kind = structurized ? "struct" : "raw"; - char name[256], type_name[8]; - - ac_build_type_name_for_intr(LLVMTypeOf(data), type_name, sizeof(type_name)); - - if (use_format) { - snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s", - indexing_kind, type_name); - } else { - snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s", - indexing_kind, type_name); - } - - ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, - AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); -} - -void -ac_build_buffer_store_format(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef data, - LLVMValueRef vindex, - LLVMValueRef voffset, - unsigned cache_policy) + return cache_policy | (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0); +} + +static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef data, LLVMValueRef vindex, + LLVMValueRef voffset, LLVMValueRef soffset, + unsigned cache_policy, bool use_format, bool structurized) { - ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, - cache_policy, true, true); + LLVMValueRef args[6]; + int idx = 0; + args[idx++] = data; + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); + if (structurized) + args[idx++] = vindex ? vindex : ctx->i32_0; + args[idx++] = voffset ? voffset : ctx->i32_0; + args[idx++] = soffset ? soffset : ctx->i32_0; + args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0); + const char *indexing_kind = structurized ? "struct" : "raw"; + char name[256], type_name[8]; + + ac_build_type_name_for_intr(LLVMTypeOf(data), type_name, sizeof(type_name)); + + if (use_format) { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s", indexing_kind, + type_name); + } else { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s", indexing_kind, type_name); + } + + ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); +} + +void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data, + LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy) +{ + ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, cache_policy, true, true); } /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), * or v4i32 (num_channels=3,4). */ -void -ac_build_buffer_store_dword(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned cache_policy) -{ - /* Split 3 channel stores, because only LLVM 9+ support 3-channel - * intrinsics. */ - if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) { - LLVMValueRef v[3], v01; - - for (int i = 0; i < 3; i++) { - v[i] = LLVMBuildExtractElement(ctx->builder, vdata, - LLVMConstInt(ctx->i32, i, 0), ""); - } - v01 = ac_build_gather_values(ctx, v, 2); - - ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, - soffset, inst_offset, cache_policy); - ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, - soffset, inst_offset + 8, - cache_policy); - return; - } - - /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset - * (voffset is swizzled, but soffset isn't swizzled). - * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. - */ - if (!(cache_policy & ac_swizzled)) { - LLVMValueRef offset = soffset; - - if (inst_offset) - offset = LLVMBuildAdd(ctx->builder, offset, - LLVMConstInt(ctx->i32, inst_offset, 0), ""); - - ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), - ctx->i32_0, voffset, offset, - cache_policy, false, false); - return; - } - - static const unsigned dfmts[] = { - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_DATA_FORMAT_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32_32 - }; - unsigned dfmt = dfmts[num_channels - 1]; - unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0); - - ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, - immoffset, num_channels, dfmt, nfmt, cache_policy); -} - -static LLVMValueRef -ac_build_buffer_load_common(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned num_channels, - LLVMTypeRef channel_type, - unsigned cache_policy, - bool can_speculate, - bool use_format, - bool structurized) -{ - LLVMValueRef args[5]; - int idx = 0; - args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); - if (structurized) - args[idx++] = vindex ? vindex : ctx->i32_0; - args[idx++] = voffset ? voffset : ctx->i32_0; - args[idx++] = soffset ? soffset : ctx->i32_0; - args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0); - unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels; - const char *indexing_kind = structurized ? "struct" : "raw"; - char name[256], type_name[8]; - - /* D16 is only supported on gfx8+ */ - assert(!use_format || - (channel_type != ctx->f16 && channel_type != ctx->i16) || - ctx->chip_class >= GFX8); - - LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type; - ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); - - if (use_format) { - snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s", - indexing_kind, type_name); - } else { - snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", - indexing_kind, type_name); - } - - return ac_build_intrinsic(ctx, name, type, args, idx, - ac_get_load_intr_attribs(can_speculate)); -} - -LLVMValueRef -ac_build_buffer_load(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - int num_channels, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned cache_policy, - bool can_speculate, - bool allow_smem) -{ - LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0); - if (voffset) - offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); - if (soffset) - offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); - - if (allow_smem && !(cache_policy & ac_slc) && - (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) { - assert(vindex == NULL); - - LLVMValueRef result[8]; - - for (int i = 0; i < num_channels; i++) { - if (i) { - offset = LLVMBuildAdd(ctx->builder, offset, - LLVMConstInt(ctx->i32, 4, 0), ""); - } - LLVMValueRef args[3] = { - rsrc, - offset, - LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0), - }; - result[i] = ac_build_intrinsic(ctx, - "llvm.amdgcn.s.buffer.load.f32", - ctx->f32, args, 3, - AC_FUNC_ATTR_READNONE); - } - if (num_channels == 1) - return result[0]; - - if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) - result[num_channels++] = LLVMGetUndef(ctx->f32); - return ac_build_gather_values(ctx, result, num_channels); - } - - return ac_build_buffer_load_common(ctx, rsrc, vindex, - offset, ctx->i32_0, - num_channels, ctx->f32, - cache_policy, - can_speculate, false, false); -} - -LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - unsigned num_channels, - unsigned cache_policy, - bool can_speculate, - bool d16) -{ - return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, - ctx->i32_0, num_channels, - d16 ? ctx->f16 : ctx->f32, - cache_policy, can_speculate, - true, true); -} - -static LLVMValueRef -ac_build_tbuffer_load(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool can_speculate, - bool structurized) -{ - voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); - - LLVMValueRef args[6]; - int idx = 0; - args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); - if (structurized) - args[idx++] = vindex ? vindex : ctx->i32_0; - args[idx++] = voffset ? voffset : ctx->i32_0; - args[idx++] = soffset ? soffset : ctx->i32_0; - args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0); - args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0); - unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; - const char *indexing_kind = structurized ? "struct" : "raw"; - char name[256], type_name[8]; - - LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; - ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); - - snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s", - indexing_kind, type_name); - - return ac_build_intrinsic(ctx, name, type, args, idx, - ac_get_load_intr_attribs(can_speculate)); -} - -LLVMValueRef -ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool can_speculate) -{ - return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset, - immoffset, num_channels, dfmt, nfmt, - cache_policy, can_speculate, true); -} - -LLVMValueRef -ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool can_speculate) -{ - return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset, - immoffset, num_channels, dfmt, nfmt, - cache_policy, can_speculate, false); -} - -LLVMValueRef -ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned cache_policy) -{ - LLVMValueRef res; - - if (LLVM_VERSION_MAJOR >= 9) { - voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); - - /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ - res = ac_build_buffer_load_common(ctx, rsrc, NULL, - voffset, soffset, - 1, ctx->i16, cache_policy, - false, false, false); - } else { - unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; - unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - - res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset, - immoffset, 1, dfmt, nfmt, cache_policy, - false); - - res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, ""); - } - - return res; -} - -LLVMValueRef -ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned cache_policy) -{ - LLVMValueRef res; - - if (LLVM_VERSION_MAJOR >= 9) { - voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); - - /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ - res = ac_build_buffer_load_common(ctx, rsrc, NULL, - voffset, soffset, - 1, ctx->i8, cache_policy, - false, false, false); - } else { - unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; - unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - - res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset, - immoffset, 1, dfmt, nfmt, cache_policy, - false); +void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, + unsigned num_channels, LLVMValueRef voffset, LLVMValueRef soffset, + unsigned inst_offset, unsigned cache_policy) +{ + /* Split 3 channel stores, because only LLVM 9+ support 3-channel + * intrinsics. */ + if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) { + LLVMValueRef v[3], v01; + + for (int i = 0; i < 3; i++) { + v[i] = LLVMBuildExtractElement(ctx->builder, vdata, LLVMConstInt(ctx->i32, i, 0), ""); + } + v01 = ac_build_gather_values(ctx, v, 2); + + ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, soffset, inst_offset, cache_policy); + ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, soffset, inst_offset + 8, + cache_policy); + return; + } + + /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset + * (voffset is swizzled, but soffset isn't swizzled). + * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. + */ + if (!(cache_policy & ac_swizzled)) { + LLVMValueRef offset = soffset; + + if (inst_offset) + offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, inst_offset, 0), ""); + + ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), ctx->i32_0, voffset, offset, + cache_policy, false, false); + return; + } + + static const unsigned dfmts[] = {V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32_32}; + unsigned dfmt = dfmts[num_channels - 1]; + unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; + LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0); + + ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, immoffset, num_channels, dfmt, + nfmt, cache_policy); +} + +static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, unsigned num_channels, + LLVMTypeRef channel_type, unsigned cache_policy, + bool can_speculate, bool use_format, + bool structurized) +{ + LLVMValueRef args[5]; + int idx = 0; + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); + if (structurized) + args[idx++] = vindex ? vindex : ctx->i32_0; + args[idx++] = voffset ? voffset : ctx->i32_0; + args[idx++] = soffset ? soffset : ctx->i32_0; + args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0); + unsigned func = + !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels; + const char *indexing_kind = structurized ? "struct" : "raw"; + char name[256], type_name[8]; + + /* D16 is only supported on gfx8+ */ + assert(!use_format || (channel_type != ctx->f16 && channel_type != ctx->i16) || + ctx->chip_class >= GFX8); + + LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type; + ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); + + if (use_format) { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s", indexing_kind, + type_name); + } else { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", indexing_kind, type_name); + } + + return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate)); +} + +LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels, + LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, + unsigned inst_offset, unsigned cache_policy, bool can_speculate, + bool allow_smem) +{ + LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0); + if (voffset) + offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); + if (soffset) + offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); + + if (allow_smem && !(cache_policy & ac_slc) && + (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) { + assert(vindex == NULL); + + LLVMValueRef result[8]; + + for (int i = 0; i < num_channels; i++) { + if (i) { + offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, 4, 0), ""); + } + LLVMValueRef args[3] = { + rsrc, + offset, + LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0), + }; + result[i] = ac_build_intrinsic(ctx, "llvm.amdgcn.s.buffer.load.f32", ctx->f32, args, 3, + AC_FUNC_ATTR_READNONE); + } + if (num_channels == 1) + return result[0]; + + if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) + result[num_channels++] = LLVMGetUndef(ctx->f32); + return ac_build_gather_values(ctx, result, num_channels); + } + + return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, ctx->i32_0, num_channels, ctx->f32, + cache_policy, can_speculate, false, false); +} + +LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + unsigned num_channels, unsigned cache_policy, + bool can_speculate, bool d16) +{ + return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, num_channels, + d16 ? ctx->f16 : ctx->f32, cache_policy, can_speculate, true, + true); +} + +static LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, LLVMValueRef immoffset, + unsigned num_channels, unsigned dfmt, unsigned nfmt, + unsigned cache_policy, bool can_speculate, + bool structurized) +{ + voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); + + LLVMValueRef args[6]; + int idx = 0; + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); + if (structurized) + args[idx++] = vindex ? vindex : ctx->i32_0; + args[idx++] = voffset ? voffset : ctx->i32_0; + args[idx++] = soffset ? soffset : ctx->i32_0; + args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0); + args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0); + unsigned func = + !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; + const char *indexing_kind = structurized ? "struct" : "raw"; + char name[256], type_name[8]; + + LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; + ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); + + snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s", indexing_kind, type_name); + + return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate)); +} + +LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, LLVMValueRef immoffset, + unsigned num_channels, unsigned dfmt, unsigned nfmt, + unsigned cache_policy, bool can_speculate) +{ + return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset, immoffset, num_channels, dfmt, + nfmt, cache_policy, can_speculate, true); +} + +LLVMValueRef ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef voffset, LLVMValueRef soffset, + LLVMValueRef immoffset, unsigned num_channels, unsigned dfmt, + unsigned nfmt, unsigned cache_policy, bool can_speculate) +{ + return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset, immoffset, num_channels, dfmt, + nfmt, cache_policy, can_speculate, false); +} + +LLVMValueRef ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef voffset, LLVMValueRef soffset, + LLVMValueRef immoffset, unsigned cache_policy) +{ + LLVMValueRef res; + + if (LLVM_VERSION_MAJOR >= 9) { + voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); + + /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ + res = ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i16, + cache_policy, false, false, false); + } else { + unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; + unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; + + res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset, immoffset, 1, dfmt, nfmt, + cache_policy, false); + + res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, ""); + } + + return res; +} + +LLVMValueRef ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef voffset, LLVMValueRef soffset, + LLVMValueRef immoffset, unsigned cache_policy) +{ + LLVMValueRef res; + + if (LLVM_VERSION_MAJOR >= 9) { + voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); + + /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ + res = ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i8, cache_policy, + false, false, false); + } else { + unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; + unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; + + res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset, immoffset, 1, dfmt, nfmt, + cache_policy, false); - res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, ""); - } + res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, ""); + } - return res; + return res; } /** @@ -1553,62 +1338,63 @@ * The input exponent is expected to be biased analogous to IEEE-754, i.e. by * 2^(exp_bits-1) - 1 (as defined in OpenGL and other graphics APIs). */ -static LLVMValueRef -ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned exp_bits, unsigned mant_bits) +static LLVMValueRef ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src, + unsigned exp_bits, unsigned mant_bits) { - assert(LLVMTypeOf(src) == ctx->i32); + assert(LLVMTypeOf(src) == ctx->i32); - LLVMValueRef tmp; - LLVMValueRef mantissa; - mantissa = LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), ""); - - /* Converting normal numbers is just a shift + correcting the exponent bias */ - unsigned normal_shift = 23 - mant_bits; - unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1); - LLVMValueRef shifted, normal; - - shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), ""); - normal = LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), ""); - - /* Converting nan/inf numbers is the same, but with a different exponent update */ - LLVMValueRef naninf; - naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), ""); - - /* Converting denormals is the complex case: determine the leading zeros of the - * mantissa to obtain the correct shift for the mantissa and exponent correction. - */ - LLVMValueRef denormal; - LLVMValueRef params[2] = { - mantissa, - ctx->i1true, /* result can be undef when arg is 0 */ - }; - LLVMValueRef ctlz = ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32, - params, 2, AC_FUNC_ATTR_READNONE); - - /* Shift such that the leading 1 ends up as the LSB of the exponent field. */ - tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), ""); - denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, ""); - - unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1; - tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, ""); - tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), ""); - denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, ""); - - /* Select the final result. */ - LLVMValueRef result; - - tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src, - LLVMConstInt(ctx->i32, ((1 << exp_bits) - 1) << mant_bits, false), ""); - result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, ""); - - tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src, - LLVMConstInt(ctx->i32, 1 << mant_bits, false), ""); - result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, ""); + LLVMValueRef tmp; + LLVMValueRef mantissa; + mantissa = + LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), ""); + + /* Converting normal numbers is just a shift + correcting the exponent bias */ + unsigned normal_shift = 23 - mant_bits; + unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1); + LLVMValueRef shifted, normal; + + shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), ""); + normal = + LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), ""); + + /* Converting nan/inf numbers is the same, but with a different exponent update */ + LLVMValueRef naninf; + naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), ""); + + /* Converting denormals is the complex case: determine the leading zeros of the + * mantissa to obtain the correct shift for the mantissa and exponent correction. + */ + LLVMValueRef denormal; + LLVMValueRef params[2] = { + mantissa, ctx->i1true, /* result can be undef when arg is 0 */ + }; + LLVMValueRef ctlz = + ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE); + + /* Shift such that the leading 1 ends up as the LSB of the exponent field. */ + tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), ""); + denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, ""); + + unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1; + tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, ""); + tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), ""); + denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, ""); + + /* Select the final result. */ + LLVMValueRef result; + + tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src, + LLVMConstInt(ctx->i32, ((1 << exp_bits) - 1) << mant_bits, false), ""); + result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, ""); + + tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src, LLVMConstInt(ctx->i32, 1 << mant_bits, false), + ""); + result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, ""); - tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, ""); - result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, ""); + tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, ""); + result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, ""); - return ac_to_float(ctx, result); + return ac_to_float(ctx, result); } /** @@ -1629,354 +1415,305 @@ * \param rsrc buffer resource descriptor * \return the resulting vector of floats or integers bitcast to <4 x i32> */ -LLVMValueRef -ac_build_opencoded_load_format(struct ac_llvm_context *ctx, - unsigned log_size, - unsigned num_channels, - unsigned format, - bool reverse, - bool known_aligned, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned cache_policy, - bool can_speculate) -{ - LLVMValueRef tmp; - unsigned load_log_size = log_size; - unsigned load_num_channels = num_channels; - if (log_size == 3) { - load_log_size = 2; - if (format == AC_FETCH_FORMAT_FLOAT) { - load_num_channels = 2 * num_channels; - } else { - load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */ - } - } - - int log_recombine = 0; - if ((ctx->chip_class == GFX6 || ctx->chip_class >= GFX10) && !known_aligned) { - /* Avoid alignment restrictions by loading one byte at a time. */ - load_num_channels <<= load_log_size; - log_recombine = load_log_size; - load_log_size = 0; - } else if (load_num_channels == 2 || load_num_channels == 4) { - log_recombine = -util_logbase2(load_num_channels); - load_num_channels = 1; - load_log_size += -log_recombine; - } - - assert(load_log_size >= 2 || LLVM_VERSION_MAJOR >= 9); - - LLVMValueRef loads[32]; /* up to 32 bytes */ - for (unsigned i = 0; i < load_num_channels; ++i) { - tmp = LLVMBuildAdd(ctx->builder, soffset, - LLVMConstInt(ctx->i32, i << load_log_size, false), ""); - LLVMTypeRef channel_type = load_log_size == 0 ? ctx->i8 : - load_log_size == 1 ? ctx->i16 : ctx->i32; - unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2); - loads[i] = ac_build_buffer_load_common( - ctx, rsrc, vindex, voffset, tmp, - num_channels, channel_type, cache_policy, - can_speculate, false, true); - if (load_log_size >= 2) - loads[i] = ac_to_integer(ctx, loads[i]); - } - - if (log_recombine > 0) { - /* Recombine bytes if necessary (GFX6 only) */ - LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16; - - for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) { - LLVMValueRef accum = NULL; - for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) { - tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, ""); - if (i == 0) { - accum = tmp; - } else { - tmp = LLVMBuildShl(ctx->builder, tmp, - LLVMConstInt(dst_type, 8 * i, false), ""); - accum = LLVMBuildOr(ctx->builder, accum, tmp, ""); - } - } - loads[dst] = accum; - } - } else if (log_recombine < 0) { - /* Split vectors of dwords */ - if (load_log_size > 2) { - assert(load_num_channels == 1); - LLVMValueRef loaded = loads[0]; - unsigned log_split = load_log_size - 2; - log_recombine += log_split; - load_num_channels = 1 << log_split; - load_log_size = 2; - for (unsigned i = 0; i < load_num_channels; ++i) { - tmp = LLVMConstInt(ctx->i32, i, false); - loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, ""); - } - } - - /* Further split dwords and shorts if required */ - if (log_recombine < 0) { - for (unsigned src = load_num_channels, - dst = load_num_channels << -log_recombine; - src > 0; --src) { - unsigned dst_bits = 1 << (3 + load_log_size + log_recombine); - LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits); - LLVMValueRef loaded = loads[src - 1]; - LLVMTypeRef loaded_type = LLVMTypeOf(loaded); - for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) { - tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false); - tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, ""); - loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, ""); - } - } - } - } - - if (log_size == 3) { - if (format == AC_FETCH_FORMAT_FLOAT) { - for (unsigned i = 0; i < num_channels; ++i) { - tmp = ac_build_gather_values(ctx, &loads[2 * i], 2); - loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, ""); - } - } else if (format == AC_FETCH_FORMAT_FIXED) { - /* 10_11_11_FLOAT */ - LLVMValueRef data = loads[0]; - LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false); - LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, ""); - tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), ""); - LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, ""); - LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), ""); - - loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6)); - loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6)); - loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5)); - - num_channels = 3; - log_size = 2; - format = AC_FETCH_FORMAT_FLOAT; - } else { - /* 2_10_10_10 data formats */ - LLVMValueRef data = loads[0]; - LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10); - LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2); - loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, ""); - tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), ""); - loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, ""); - tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), ""); - loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, ""); - tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), ""); - loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, ""); - - num_channels = 4; - } - } - - if (format == AC_FETCH_FORMAT_FLOAT) { - if (log_size != 2) { - for (unsigned chan = 0; chan < num_channels; ++chan) { - tmp = ac_to_float(ctx, loads[chan]); - if (log_size == 3) - tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, ""); - else if (log_size == 1) - tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, ""); - loads[chan] = ac_to_integer(ctx, tmp); - } - } - } else if (format == AC_FETCH_FORMAT_UINT) { - if (log_size != 2) { - for (unsigned chan = 0; chan < num_channels; ++chan) - loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, ""); - } - } else if (format == AC_FETCH_FORMAT_SINT) { - if (log_size != 2) { - for (unsigned chan = 0; chan < num_channels; ++chan) - loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, ""); - } - } else { - bool unsign = format == AC_FETCH_FORMAT_UNORM || - format == AC_FETCH_FORMAT_USCALED || - format == AC_FETCH_FORMAT_UINT; - - for (unsigned chan = 0; chan < num_channels; ++chan) { - if (unsign) { - tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, ""); - } else { - tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, ""); - } - - LLVMValueRef scale = NULL; - if (format == AC_FETCH_FORMAT_FIXED) { - assert(log_size == 2); - scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000); - } else if (format == AC_FETCH_FORMAT_UNORM) { - unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan])); - scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1)); - } else if (format == AC_FETCH_FORMAT_SNORM) { - unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan])); - scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1)); - } - if (scale) - tmp = LLVMBuildFMul(ctx->builder, tmp, scale, ""); - - if (format == AC_FETCH_FORMAT_SNORM) { - /* Clamp to [-1, 1] */ - LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0); - LLVMValueRef clamp = - LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, ""); - tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, ""); - } - - loads[chan] = ac_to_integer(ctx, tmp); - } - } - - while (num_channels < 4) { - if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) { - loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0; - } else { - loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0); - } - num_channels++; - } - - if (reverse) { - tmp = loads[0]; - loads[0] = loads[2]; - loads[2] = tmp; - } - - return ac_build_gather_values(ctx, loads, 4); -} - -static void -ac_build_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool structurized) -{ - voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0, - immoffset, ""); - - LLVMValueRef args[7]; - int idx = 0; - args[idx++] = vdata; - args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); - if (structurized) - args[idx++] = vindex ? vindex : ctx->i32_0; - args[idx++] = voffset ? voffset : ctx->i32_0; - args[idx++] = soffset ? soffset : ctx->i32_0; - args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0); - args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0); - unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; - const char *indexing_kind = structurized ? "struct" : "raw"; - char name[256], type_name[8]; - - LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; - ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); - - snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", - indexing_kind, type_name); - - ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, - AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); -} - -void -ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy) -{ - ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, - immoffset, num_channels, dfmt, nfmt, cache_policy, - true); -} - -void -ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy) -{ - ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset, - immoffset, num_channels, dfmt, nfmt, cache_policy, - false); -} - -void -ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned cache_policy) -{ - vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, ""); - - if (LLVM_VERSION_MAJOR >= 9) { - /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ - ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, - voffset, soffset, cache_policy, - false, false); - } else { - unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; - unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - - vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, ""); - - ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, - ctx->i32_0, 1, dfmt, nfmt, cache_policy); - } -} - -void -ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned cache_policy) -{ - vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, ""); - - if (LLVM_VERSION_MAJOR >= 9) { - /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ - ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, - voffset, soffset, cache_policy, - false, false); - } else { - unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; - unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - - vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, ""); - - ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, - ctx->i32_0, 1, dfmt, nfmt, cache_policy); - } +LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size, + unsigned num_channels, unsigned format, bool reverse, + bool known_aligned, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, unsigned cache_policy, + bool can_speculate) +{ + LLVMValueRef tmp; + unsigned load_log_size = log_size; + unsigned load_num_channels = num_channels; + if (log_size == 3) { + load_log_size = 2; + if (format == AC_FETCH_FORMAT_FLOAT) { + load_num_channels = 2 * num_channels; + } else { + load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */ + } + } + + int log_recombine = 0; + if ((ctx->chip_class == GFX6 || ctx->chip_class >= GFX10) && !known_aligned) { + /* Avoid alignment restrictions by loading one byte at a time. */ + load_num_channels <<= load_log_size; + log_recombine = load_log_size; + load_log_size = 0; + } else if (load_num_channels == 2 || load_num_channels == 4) { + log_recombine = -util_logbase2(load_num_channels); + load_num_channels = 1; + load_log_size += -log_recombine; + } + + assert(load_log_size >= 2 || LLVM_VERSION_MAJOR >= 9); + + LLVMValueRef loads[32]; /* up to 32 bytes */ + for (unsigned i = 0; i < load_num_channels; ++i) { + tmp = + LLVMBuildAdd(ctx->builder, soffset, LLVMConstInt(ctx->i32, i << load_log_size, false), ""); + LLVMTypeRef channel_type = + load_log_size == 0 ? ctx->i8 : load_log_size == 1 ? ctx->i16 : ctx->i32; + unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2); + loads[i] = + ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, tmp, num_channels, channel_type, + cache_policy, can_speculate, false, true); + if (load_log_size >= 2) + loads[i] = ac_to_integer(ctx, loads[i]); + } + + if (log_recombine > 0) { + /* Recombine bytes if necessary (GFX6 only) */ + LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16; + + for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) { + LLVMValueRef accum = NULL; + for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) { + tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, ""); + if (i == 0) { + accum = tmp; + } else { + tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(dst_type, 8 * i, false), ""); + accum = LLVMBuildOr(ctx->builder, accum, tmp, ""); + } + } + loads[dst] = accum; + } + } else if (log_recombine < 0) { + /* Split vectors of dwords */ + if (load_log_size > 2) { + assert(load_num_channels == 1); + LLVMValueRef loaded = loads[0]; + unsigned log_split = load_log_size - 2; + log_recombine += log_split; + load_num_channels = 1 << log_split; + load_log_size = 2; + for (unsigned i = 0; i < load_num_channels; ++i) { + tmp = LLVMConstInt(ctx->i32, i, false); + loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, ""); + } + } + + /* Further split dwords and shorts if required */ + if (log_recombine < 0) { + for (unsigned src = load_num_channels, dst = load_num_channels << -log_recombine; src > 0; + --src) { + unsigned dst_bits = 1 << (3 + load_log_size + log_recombine); + LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits); + LLVMValueRef loaded = loads[src - 1]; + LLVMTypeRef loaded_type = LLVMTypeOf(loaded); + for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) { + tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false); + tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, ""); + loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, ""); + } + } + } + } + + if (log_size == 3) { + if (format == AC_FETCH_FORMAT_FLOAT) { + for (unsigned i = 0; i < num_channels; ++i) { + tmp = ac_build_gather_values(ctx, &loads[2 * i], 2); + loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, ""); + } + } else if (format == AC_FETCH_FORMAT_FIXED) { + /* 10_11_11_FLOAT */ + LLVMValueRef data = loads[0]; + LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false); + LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, ""); + tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), ""); + LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, ""); + LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), ""); + + loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6)); + loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6)); + loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5)); + + num_channels = 3; + log_size = 2; + format = AC_FETCH_FORMAT_FLOAT; + } else { + /* 2_10_10_10 data formats */ + LLVMValueRef data = loads[0]; + LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10); + LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2); + loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, ""); + tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), ""); + loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, ""); + tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), ""); + loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, ""); + tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), ""); + loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, ""); + + num_channels = 4; + } + } + + if (format == AC_FETCH_FORMAT_FLOAT) { + if (log_size != 2) { + for (unsigned chan = 0; chan < num_channels; ++chan) { + tmp = ac_to_float(ctx, loads[chan]); + if (log_size == 3) + tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, ""); + else if (log_size == 1) + tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, ""); + loads[chan] = ac_to_integer(ctx, tmp); + } + } + } else if (format == AC_FETCH_FORMAT_UINT) { + if (log_size != 2) { + for (unsigned chan = 0; chan < num_channels; ++chan) + loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, ""); + } + } else if (format == AC_FETCH_FORMAT_SINT) { + if (log_size != 2) { + for (unsigned chan = 0; chan < num_channels; ++chan) + loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, ""); + } + } else { + bool unsign = format == AC_FETCH_FORMAT_UNORM || format == AC_FETCH_FORMAT_USCALED || + format == AC_FETCH_FORMAT_UINT; + + for (unsigned chan = 0; chan < num_channels; ++chan) { + if (unsign) { + tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, ""); + } else { + tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, ""); + } + + LLVMValueRef scale = NULL; + if (format == AC_FETCH_FORMAT_FIXED) { + assert(log_size == 2); + scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000); + } else if (format == AC_FETCH_FORMAT_UNORM) { + unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan])); + scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1)); + } else if (format == AC_FETCH_FORMAT_SNORM) { + unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan])); + scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1)); + } + if (scale) + tmp = LLVMBuildFMul(ctx->builder, tmp, scale, ""); + + if (format == AC_FETCH_FORMAT_SNORM) { + /* Clamp to [-1, 1] */ + LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0); + LLVMValueRef clamp = LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, ""); + tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, ""); + } + + loads[chan] = ac_to_integer(ctx, tmp); + } + } + + while (num_channels < 4) { + if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) { + loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0; + } else { + loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0); + } + num_channels++; + } + + if (reverse) { + tmp = loads[0]; + loads[0] = loads[2]; + loads[2] = tmp; + } + + return ac_build_gather_values(ctx, loads, 4); +} + +static void ac_build_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, LLVMValueRef immoffset, + unsigned num_channels, unsigned dfmt, unsigned nfmt, + unsigned cache_policy, bool structurized) +{ + voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0, immoffset, ""); + + LLVMValueRef args[7]; + int idx = 0; + args[idx++] = vdata; + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); + if (structurized) + args[idx++] = vindex ? vindex : ctx->i32_0; + args[idx++] = voffset ? voffset : ctx->i32_0; + args[idx++] = soffset ? soffset : ctx->i32_0; + args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0); + args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0); + unsigned func = + !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; + const char *indexing_kind = structurized ? "struct" : "raw"; + char name[256], type_name[8]; + + LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; + ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); + + snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", indexing_kind, type_name); + + ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); +} + +void ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, LLVMValueRef immoffset, + unsigned num_channels, unsigned dfmt, unsigned nfmt, + unsigned cache_policy) +{ + ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, immoffset, num_channels, dfmt, + nfmt, cache_policy, true); +} + +void ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, + LLVMValueRef voffset, LLVMValueRef soffset, LLVMValueRef immoffset, + unsigned num_channels, unsigned dfmt, unsigned nfmt, + unsigned cache_policy) +{ + ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset, immoffset, num_channels, dfmt, + nfmt, cache_policy, false); +} + +void ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset, + unsigned cache_policy) +{ + vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, ""); + + if (LLVM_VERSION_MAJOR >= 9) { + /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ + ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false, + false); + } else { + unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; + unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; + + vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, ""); + + ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, ctx->i32_0, 1, dfmt, nfmt, + cache_policy); + } +} + +void ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, + LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy) +{ + vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, ""); + + if (LLVM_VERSION_MAJOR >= 9) { + /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ + ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false, + false); + } else { + unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; + unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; + + vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, ""); + + ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, ctx->i32_0, 1, dfmt, nfmt, + cache_policy); + } } /** * Set range metadata on an instruction. This can only be used on load and @@ -1985,40 +1722,37 @@ * \p lo is the minimum value inclusive. * \p hi is the maximum value exclusive. */ -static void set_range_metadata(struct ac_llvm_context *ctx, - LLVMValueRef value, unsigned lo, unsigned hi) +static void set_range_metadata(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned lo, + unsigned hi) { - LLVMValueRef range_md, md_args[2]; - LLVMTypeRef type = LLVMTypeOf(value); - LLVMContextRef context = LLVMGetTypeContext(type); - - md_args[0] = LLVMConstInt(type, lo, false); - md_args[1] = LLVMConstInt(type, hi, false); - range_md = LLVMMDNodeInContext(context, md_args, 2); - LLVMSetMetadata(value, ctx->range_md_kind, range_md); -} - -LLVMValueRef -ac_get_thread_id(struct ac_llvm_context *ctx) -{ - LLVMValueRef tid; - - LLVMValueRef tid_args[2]; - tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false); - tid_args[1] = ctx->i32_0; - tid_args[1] = ac_build_intrinsic(ctx, - "llvm.amdgcn.mbcnt.lo", ctx->i32, - tid_args, 2, AC_FUNC_ATTR_READNONE); - - if (ctx->wave_size == 32) { - tid = tid_args[1]; - } else { - tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", - ctx->i32, tid_args, - 2, AC_FUNC_ATTR_READNONE); - } - set_range_metadata(ctx, tid, 0, ctx->wave_size); - return tid; + LLVMValueRef range_md, md_args[2]; + LLVMTypeRef type = LLVMTypeOf(value); + LLVMContextRef context = LLVMGetTypeContext(type); + + md_args[0] = LLVMConstInt(type, lo, false); + md_args[1] = LLVMConstInt(type, hi, false); + range_md = LLVMMDNodeInContext(context, md_args, 2); + LLVMSetMetadata(value, ctx->range_md_kind, range_md); +} + +LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx) +{ + LLVMValueRef tid; + + LLVMValueRef tid_args[2]; + tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false); + tid_args[1] = ctx->i32_0; + tid_args[1] = + ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, tid_args, 2, AC_FUNC_ATTR_READNONE); + + if (ctx->wave_size == 32) { + tid = tid_args[1]; + } else { + tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, tid_args, 2, + AC_FUNC_ATTR_READNONE); + } + set_range_metadata(ctx, tid, 0, ctx->wave_size); + return tid; } /* @@ -2045,1522 +1779,1407 @@ * Adding 1 yields the TID of the pixel to the right of the left pixel, and * adding 2 yields the TID of the pixel below the top pixel. */ -LLVMValueRef -ac_build_ddxy(struct ac_llvm_context *ctx, - uint32_t mask, - int idx, - LLVMValueRef val) -{ - unsigned tl_lanes[4], trbl_lanes[4]; - char name[32], type[8]; - LLVMValueRef tl, trbl; - LLVMTypeRef result_type; - LLVMValueRef result; - - result_type = ac_to_float_type(ctx, LLVMTypeOf(val)); - - if (result_type == ctx->f16) - val = LLVMBuildZExt(ctx->builder, val, ctx->i32, ""); - else if (result_type == ctx->v2f16) - val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, ""); - - for (unsigned i = 0; i < 4; ++i) { - tl_lanes[i] = i & mask; - trbl_lanes[i] = (i & mask) + idx; - } - - tl = ac_build_quad_swizzle(ctx, val, - tl_lanes[0], tl_lanes[1], - tl_lanes[2], tl_lanes[3]); - trbl = ac_build_quad_swizzle(ctx, val, - trbl_lanes[0], trbl_lanes[1], - trbl_lanes[2], trbl_lanes[3]); - - if (result_type == ctx->f16) { - tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, ""); - trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, ""); - } - - tl = LLVMBuildBitCast(ctx->builder, tl, result_type, ""); - trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, ""); - result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); - - ac_build_type_name_for_intr(result_type, type, sizeof(type)); - snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type); - - return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0); -} - -void -ac_build_sendmsg(struct ac_llvm_context *ctx, - uint32_t msg, - LLVMValueRef wave_id) -{ - LLVMValueRef args[2]; - args[0] = LLVMConstInt(ctx->i32, msg, false); - args[1] = wave_id; - ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0); -} - -LLVMValueRef -ac_build_imsb(struct ac_llvm_context *ctx, - LLVMValueRef arg, - LLVMTypeRef dst_type) -{ - LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32", - dst_type, &arg, 1, - AC_FUNC_ATTR_READNONE); - - /* The HW returns the last bit index from MSB, but NIR/TGSI wants - * the index from LSB. Invert it by doing "31 - msb". */ - msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), - msb, ""); - - LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true); - LLVMValueRef cond = LLVMBuildOr(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntEQ, - arg, ctx->i32_0, ""), - LLVMBuildICmp(ctx->builder, LLVMIntEQ, - arg, all_ones, ""), ""); - - return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, ""); -} - -LLVMValueRef -ac_build_umsb(struct ac_llvm_context *ctx, - LLVMValueRef arg, - LLVMTypeRef dst_type) -{ - const char *intrin_name; - LLVMTypeRef type; - LLVMValueRef highest_bit; - LLVMValueRef zero; - unsigned bitsize; - - bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg)); - switch (bitsize) { - case 64: - intrin_name = "llvm.ctlz.i64"; - type = ctx->i64; - highest_bit = LLVMConstInt(ctx->i64, 63, false); - zero = ctx->i64_0; - break; - case 32: - intrin_name = "llvm.ctlz.i32"; - type = ctx->i32; - highest_bit = LLVMConstInt(ctx->i32, 31, false); - zero = ctx->i32_0; - break; - case 16: - intrin_name = "llvm.ctlz.i16"; - type = ctx->i16; - highest_bit = LLVMConstInt(ctx->i16, 15, false); - zero = ctx->i16_0; - break; - case 8: - intrin_name = "llvm.ctlz.i8"; - type = ctx->i8; - highest_bit = LLVMConstInt(ctx->i8, 7, false); - zero = ctx->i8_0; - break; - default: - unreachable(!"invalid bitsize"); - break; - } - - LLVMValueRef params[2] = { - arg, - ctx->i1true, - }; - - LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type, - params, 2, - AC_FUNC_ATTR_READNONE); - - /* The HW returns the last bit index from MSB, but TGSI/NIR wants - * the index from LSB. Invert it by doing "31 - msb". */ - msb = LLVMBuildSub(ctx->builder, highest_bit, msb, ""); - - if (bitsize == 64) { - msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, ""); - } else if (bitsize < 32) { - msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, ""); - } - - /* check for zero */ - return LLVMBuildSelect(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""), - LLVMConstInt(ctx->i32, -1, true), msb, ""); -} - -LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b) -{ - char name[64], type[64]; - - ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); - snprintf(name, sizeof(name), "llvm.minnum.%s", type); - LLVMValueRef args[2] = {a, b}; - return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, - AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b) -{ - char name[64], type[64]; - - ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); - snprintf(name, sizeof(name), "llvm.maxnum.%s", type); - LLVMValueRef args[2] = {a, b}; - return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, - AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b) -{ - LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, ""); - return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); -} - -LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b) -{ - LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, ""); - return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); -} - -LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b) -{ - LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, ""); - return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, uint32_t mask, int idx, LLVMValueRef val) +{ + unsigned tl_lanes[4], trbl_lanes[4]; + char name[32], type[8]; + LLVMValueRef tl, trbl; + LLVMTypeRef result_type; + LLVMValueRef result; + + result_type = ac_to_float_type(ctx, LLVMTypeOf(val)); + + if (result_type == ctx->f16) + val = LLVMBuildZExt(ctx->builder, val, ctx->i32, ""); + else if (result_type == ctx->v2f16) + val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, ""); + + for (unsigned i = 0; i < 4; ++i) { + tl_lanes[i] = i & mask; + trbl_lanes[i] = (i & mask) + idx; + } + + tl = ac_build_quad_swizzle(ctx, val, tl_lanes[0], tl_lanes[1], tl_lanes[2], tl_lanes[3]); + trbl = + ac_build_quad_swizzle(ctx, val, trbl_lanes[0], trbl_lanes[1], trbl_lanes[2], trbl_lanes[3]); + + if (result_type == ctx->f16) { + tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, ""); + trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, ""); + } + + tl = LLVMBuildBitCast(ctx->builder, tl, result_type, ""); + trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, ""); + result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); + + ac_build_type_name_for_intr(result_type, type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type); + + return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0); } -LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b) +void ac_build_sendmsg(struct ac_llvm_context *ctx, uint32_t msg, LLVMValueRef wave_id) { - LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, ""); - return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); + LLVMValueRef args[2]; + args[0] = LLVMConstInt(ctx->i32, msg, false); + args[1] = wave_id; + ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0); +} + +LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type) +{ + LLVMValueRef msb = + ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32", dst_type, &arg, 1, AC_FUNC_ATTR_READNONE); + + /* The HW returns the last bit index from MSB, but NIR/TGSI wants + * the index from LSB. Invert it by doing "31 - msb". */ + msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), msb, ""); + + LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true); + LLVMValueRef cond = + LLVMBuildOr(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, ctx->i32_0, ""), + LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, all_ones, ""), ""); + + return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, ""); +} + +LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type) +{ + const char *intrin_name; + LLVMTypeRef type; + LLVMValueRef highest_bit; + LLVMValueRef zero; + unsigned bitsize; + + bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg)); + switch (bitsize) { + case 64: + intrin_name = "llvm.ctlz.i64"; + type = ctx->i64; + highest_bit = LLVMConstInt(ctx->i64, 63, false); + zero = ctx->i64_0; + break; + case 32: + intrin_name = "llvm.ctlz.i32"; + type = ctx->i32; + highest_bit = LLVMConstInt(ctx->i32, 31, false); + zero = ctx->i32_0; + break; + case 16: + intrin_name = "llvm.ctlz.i16"; + type = ctx->i16; + highest_bit = LLVMConstInt(ctx->i16, 15, false); + zero = ctx->i16_0; + break; + case 8: + intrin_name = "llvm.ctlz.i8"; + type = ctx->i8; + highest_bit = LLVMConstInt(ctx->i8, 7, false); + zero = ctx->i8_0; + break; + default: + unreachable(!"invalid bitsize"); + break; + } + + LLVMValueRef params[2] = { + arg, + ctx->i1true, + }; + + LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE); + + /* The HW returns the last bit index from MSB, but TGSI/NIR wants + * the index from LSB. Invert it by doing "31 - msb". */ + msb = LLVMBuildSub(ctx->builder, highest_bit, msb, ""); + + if (bitsize == 64) { + msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, ""); + } else if (bitsize < 32) { + msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, ""); + } + + /* check for zero */ + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""), + LLVMConstInt(ctx->i32, -1, true), msb, ""); +} + +LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + char name[64], type[64]; + + ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.minnum.%s", type); + LLVMValueRef args[2] = {a, b}; + return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + char name[64], type[64]; + + ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.maxnum.%s", type); + LLVMValueRef args[2] = {a, b}; + return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); } LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value) { - LLVMTypeRef t = LLVMTypeOf(value); - return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)), - LLVMConstReal(t, 1.0)); + LLVMTypeRef t = LLVMTypeOf(value); + return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)), + LLVMConstReal(t, 1.0)); } void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a) { - LLVMValueRef args[9]; + LLVMValueRef args[9]; + + args[0] = LLVMConstInt(ctx->i32, a->target, 0); + args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0); - args[0] = LLVMConstInt(ctx->i32, a->target, 0); - args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0); + if (a->compr) { + args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], ctx->v2i16, ""); + args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], ctx->v2i16, ""); + args[4] = LLVMConstInt(ctx->i1, a->done, 0); + args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0); + + ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16", ctx->voidt, args, 6, 0); + } else { + args[2] = a->out[0]; + args[3] = a->out[1]; + args[4] = a->out[2]; + args[5] = a->out[3]; + args[6] = LLVMConstInt(ctx->i1, a->done, 0); + args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0); - if (a->compr) { - args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], - ctx->v2i16, ""); - args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], - ctx->v2i16, ""); - args[4] = LLVMConstInt(ctx->i1, a->done, 0); - args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0); - - ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16", - ctx->voidt, args, 6, 0); - } else { - args[2] = a->out[0]; - args[3] = a->out[1]; - args[4] = a->out[2]; - args[5] = a->out[3]; - args[6] = LLVMConstInt(ctx->i1, a->done, 0); - args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0); - - ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", - ctx->voidt, args, 8, 0); - } + ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0); + } } void ac_build_export_null(struct ac_llvm_context *ctx) { - struct ac_export_args args; + struct ac_export_args args; - args.enabled_channels = 0x0; /* enabled channels */ - args.valid_mask = 1; /* whether the EXEC mask is valid */ - args.done = 1; /* DONE bit */ - args.target = V_008DFC_SQ_EXP_NULL; - args.compr = 0; /* COMPR flag (0 = 32-bit export) */ - args.out[0] = LLVMGetUndef(ctx->f32); /* R */ - args.out[1] = LLVMGetUndef(ctx->f32); /* G */ - args.out[2] = LLVMGetUndef(ctx->f32); /* B */ - args.out[3] = LLVMGetUndef(ctx->f32); /* A */ + args.enabled_channels = 0x0; /* enabled channels */ + args.valid_mask = 1; /* whether the EXEC mask is valid */ + args.done = 1; /* DONE bit */ + args.target = V_008DFC_SQ_EXP_NULL; + args.compr = 0; /* COMPR flag (0 = 32-bit export) */ + args.out[0] = LLVMGetUndef(ctx->f32); /* R */ + args.out[1] = LLVMGetUndef(ctx->f32); /* G */ + args.out[2] = LLVMGetUndef(ctx->f32); /* B */ + args.out[3] = LLVMGetUndef(ctx->f32); /* A */ - ac_build_export(ctx, &args); + ac_build_export(ctx, &args); } static unsigned ac_num_coords(enum ac_image_dim dim) { - switch (dim) { - case ac_image_1d: - return 1; - case ac_image_2d: - case ac_image_1darray: - return 2; - case ac_image_3d: - case ac_image_cube: - case ac_image_2darray: - case ac_image_2dmsaa: - return 3; - case ac_image_2darraymsaa: - return 4; - default: - unreachable("ac_num_coords: bad dim"); - } + switch (dim) { + case ac_image_1d: + return 1; + case ac_image_2d: + case ac_image_1darray: + return 2; + case ac_image_3d: + case ac_image_cube: + case ac_image_2darray: + case ac_image_2dmsaa: + return 3; + case ac_image_2darraymsaa: + return 4; + default: + unreachable("ac_num_coords: bad dim"); + } } static unsigned ac_num_derivs(enum ac_image_dim dim) { - switch (dim) { - case ac_image_1d: - case ac_image_1darray: - return 2; - case ac_image_2d: - case ac_image_2darray: - case ac_image_cube: - return 4; - case ac_image_3d: - return 6; - case ac_image_2dmsaa: - case ac_image_2darraymsaa: - default: - unreachable("derivatives not supported"); - } + switch (dim) { + case ac_image_1d: + case ac_image_1darray: + return 2; + case ac_image_2d: + case ac_image_2darray: + case ac_image_cube: + return 4; + case ac_image_3d: + return 6; + case ac_image_2dmsaa: + case ac_image_2darraymsaa: + default: + unreachable("derivatives not supported"); + } } static const char *get_atomic_name(enum ac_atomic_op op) { - switch (op) { - case ac_atomic_swap: return "swap"; - case ac_atomic_add: return "add"; - case ac_atomic_sub: return "sub"; - case ac_atomic_smin: return "smin"; - case ac_atomic_umin: return "umin"; - case ac_atomic_smax: return "smax"; - case ac_atomic_umax: return "umax"; - case ac_atomic_and: return "and"; - case ac_atomic_or: return "or"; - case ac_atomic_xor: return "xor"; - case ac_atomic_inc_wrap: return "inc"; - case ac_atomic_dec_wrap: return "dec"; - } - unreachable("bad atomic op"); -} - -LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, - struct ac_image_args *a) -{ - const char *overload[3] = { "", "", "" }; - unsigned num_overloads = 0; - LLVMValueRef args[18]; - unsigned num_args = 0; - enum ac_image_dim dim = a->dim; - - assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || - !a->level_zero); - assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip && - a->opcode != ac_image_store_mip) || - a->lod); - assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || - (!a->compare && !a->offset)); - assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || - a->opcode == ac_image_get_lod) || - !a->bias); - assert((a->bias ? 1 : 0) + - (a->lod ? 1 : 0) + - (a->level_zero ? 1 : 0) + - (a->derivs[0] ? 1 : 0) <= 1); - assert((a->min_lod ? 1 : 0) + - (a->lod ? 1 : 0) + - (a->level_zero ? 1 : 0) <= 1); - assert(!a->d16 || (ctx->chip_class >= GFX8 && - a->opcode != ac_image_atomic && - a->opcode != ac_image_atomic_cmpswap && - a->opcode != ac_image_get_lod && - a->opcode != ac_image_get_resinfo)); - - if (a->opcode == ac_image_get_lod) { - switch (dim) { - case ac_image_1darray: - dim = ac_image_1d; - break; - case ac_image_2darray: - case ac_image_cube: - dim = ac_image_2d; - break; - default: - break; - } - } - - bool sample = a->opcode == ac_image_sample || - a->opcode == ac_image_gather4 || - a->opcode == ac_image_get_lod; - bool atomic = a->opcode == ac_image_atomic || - a->opcode == ac_image_atomic_cmpswap; - bool load = a->opcode == ac_image_sample || - a->opcode == ac_image_gather4 || - a->opcode == ac_image_load || - a->opcode == ac_image_load_mip; - LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32; - - if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) { - args[num_args++] = a->data[0]; - if (a->opcode == ac_image_atomic_cmpswap) - args[num_args++] = a->data[1]; - } - - if (!atomic) - args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false); - - if (a->offset) - args[num_args++] = ac_to_integer(ctx, a->offset); - if (a->bias) { - args[num_args++] = ac_to_float(ctx, a->bias); - overload[num_overloads++] = ".f32"; - } - if (a->compare) - args[num_args++] = ac_to_float(ctx, a->compare); - if (a->derivs[0]) { - unsigned count = ac_num_derivs(dim); - for (unsigned i = 0; i < count; ++i) - args[num_args++] = ac_to_float(ctx, a->derivs[i]); - overload[num_overloads++] = ".f32"; - } - unsigned num_coords = - a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0; - for (unsigned i = 0; i < num_coords; ++i) - args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, ""); - if (a->lod) - args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, ""); - if (a->min_lod) - args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, ""); - - overload[num_overloads++] = sample ? ".f32" : ".i32"; - - args[num_args++] = a->resource; - if (sample) { - args[num_args++] = a->sampler; - args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false); - } - - args[num_args++] = ctx->i32_0; /* texfailctrl */ - args[num_args++] = LLVMConstInt(ctx->i32, - load ? get_load_cache_policy(ctx, a->cache_policy) : - a->cache_policy, false); - - const char *name; - const char *atomic_subop = ""; - switch (a->opcode) { - case ac_image_sample: name = "sample"; break; - case ac_image_gather4: name = "gather4"; break; - case ac_image_load: name = "load"; break; - case ac_image_load_mip: name = "load.mip"; break; - case ac_image_store: name = "store"; break; - case ac_image_store_mip: name = "store.mip"; break; - case ac_image_atomic: - name = "atomic."; - atomic_subop = get_atomic_name(a->atomic); - break; - case ac_image_atomic_cmpswap: - name = "atomic."; - atomic_subop = "cmpswap"; - break; - case ac_image_get_lod: name = "getlod"; break; - case ac_image_get_resinfo: name = "getresinfo"; break; - default: unreachable("invalid image opcode"); - } - - const char *dimname; - switch (dim) { - case ac_image_1d: dimname = "1d"; break; - case ac_image_2d: dimname = "2d"; break; - case ac_image_3d: dimname = "3d"; break; - case ac_image_cube: dimname = "cube"; break; - case ac_image_1darray: dimname = "1darray"; break; - case ac_image_2darray: dimname = "2darray"; break; - case ac_image_2dmsaa: dimname = "2dmsaa"; break; - case ac_image_2darraymsaa: dimname = "2darraymsaa"; break; - default: unreachable("invalid dim"); - } - - bool lod_suffix = - a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); - char intr_name[96]; - snprintf(intr_name, sizeof(intr_name), - "llvm.amdgcn.image.%s%s" /* base name */ - "%s%s%s%s" /* sample/gather modifiers */ - ".%s.%s%s%s%s", /* dimension and type overloads */ - name, atomic_subop, - a->compare ? ".c" : "", - a->bias ? ".b" : - lod_suffix ? ".l" : - a->derivs[0] ? ".d" : - a->level_zero ? ".lz" : "", - a->min_lod ? ".cl" : "", - a->offset ? ".o" : "", - dimname, - atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"), - overload[0], overload[1], overload[2]); - - LLVMTypeRef retty; - if (atomic) - retty = ctx->i32; - else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip) - retty = ctx->voidt; - else - retty = a->d16 ? ctx->v4f16 : ctx->v4f32; - - LLVMValueRef result = - ac_build_intrinsic(ctx, intr_name, retty, args, num_args, - a->attributes); - if (!sample && !atomic && retty != ctx->voidt) - result = ac_to_integer(ctx, result); - - return result; -} - -LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, - LLVMValueRef rsrc) -{ - LLVMValueRef samples; - - /* Read the samples from the descriptor directly. - * Hardware doesn't have any instruction for this. - */ - samples = LLVMBuildExtractElement(ctx->builder, rsrc, - LLVMConstInt(ctx->i32, 3, 0), ""); - samples = LLVMBuildLShr(ctx->builder, samples, - LLVMConstInt(ctx->i32, 16, 0), ""); - samples = LLVMBuildAnd(ctx->builder, samples, - LLVMConstInt(ctx->i32, 0xf, 0), ""); - samples = LLVMBuildShl(ctx->builder, ctx->i32_1, - samples, ""); - return samples; -} - -LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, - LLVMValueRef args[2]) -{ - return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, - args, 2, AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, - LLVMValueRef args[2]) -{ - LLVMValueRef res = - ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", - ctx->v2i16, args, 2, - AC_FUNC_ATTR_READNONE); - return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); -} - -LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, - LLVMValueRef args[2]) -{ - LLVMValueRef res = - ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", - ctx->v2i16, args, 2, - AC_FUNC_ATTR_READNONE); - return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); + switch (op) { + case ac_atomic_swap: + return "swap"; + case ac_atomic_add: + return "add"; + case ac_atomic_sub: + return "sub"; + case ac_atomic_smin: + return "smin"; + case ac_atomic_umin: + return "umin"; + case ac_atomic_smax: + return "smax"; + case ac_atomic_umax: + return "umax"; + case ac_atomic_and: + return "and"; + case ac_atomic_or: + return "or"; + case ac_atomic_xor: + return "xor"; + case ac_atomic_inc_wrap: + return "inc"; + case ac_atomic_dec_wrap: + return "dec"; + } + unreachable("bad atomic op"); +} + +LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a) +{ + const char *overload[3] = {"", "", ""}; + unsigned num_overloads = 0; + LLVMValueRef args[18]; + unsigned num_args = 0; + enum ac_image_dim dim = a->dim; + + assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || !a->level_zero); + assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip && + a->opcode != ac_image_store_mip) || + a->lod); + assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || + (!a->compare && !a->offset)); + assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || + a->opcode == ac_image_get_lod) || + !a->bias); + assert((a->bias ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <= + 1); + assert((a->min_lod ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) <= 1); + assert(!a->d16 || (ctx->chip_class >= GFX8 && a->opcode != ac_image_atomic && + a->opcode != ac_image_atomic_cmpswap && a->opcode != ac_image_get_lod && + a->opcode != ac_image_get_resinfo)); + + if (a->opcode == ac_image_get_lod) { + switch (dim) { + case ac_image_1darray: + dim = ac_image_1d; + break; + case ac_image_2darray: + case ac_image_cube: + dim = ac_image_2d; + break; + default: + break; + } + } + + bool sample = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || + a->opcode == ac_image_get_lod; + bool atomic = a->opcode == ac_image_atomic || a->opcode == ac_image_atomic_cmpswap; + bool load = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || + a->opcode == ac_image_load || a->opcode == ac_image_load_mip; + LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32; + + if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) { + args[num_args++] = a->data[0]; + if (a->opcode == ac_image_atomic_cmpswap) + args[num_args++] = a->data[1]; + } + + if (!atomic) + args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false); + + if (a->offset) + args[num_args++] = ac_to_integer(ctx, a->offset); + if (a->bias) { + args[num_args++] = ac_to_float(ctx, a->bias); + overload[num_overloads++] = ".f32"; + } + if (a->compare) + args[num_args++] = ac_to_float(ctx, a->compare); + if (a->derivs[0]) { + unsigned count = ac_num_derivs(dim); + for (unsigned i = 0; i < count; ++i) + args[num_args++] = ac_to_float(ctx, a->derivs[i]); + overload[num_overloads++] = ".f32"; + } + unsigned num_coords = a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0; + for (unsigned i = 0; i < num_coords; ++i) + args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, ""); + if (a->lod) + args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, ""); + if (a->min_lod) + args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, ""); + + overload[num_overloads++] = sample ? ".f32" : ".i32"; + + args[num_args++] = a->resource; + if (sample) { + args[num_args++] = a->sampler; + args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false); + } + + args[num_args++] = ctx->i32_0; /* texfailctrl */ + args[num_args++] = LLVMConstInt( + ctx->i32, load ? get_load_cache_policy(ctx, a->cache_policy) : a->cache_policy, false); + + const char *name; + const char *atomic_subop = ""; + switch (a->opcode) { + case ac_image_sample: + name = "sample"; + break; + case ac_image_gather4: + name = "gather4"; + break; + case ac_image_load: + name = "load"; + break; + case ac_image_load_mip: + name = "load.mip"; + break; + case ac_image_store: + name = "store"; + break; + case ac_image_store_mip: + name = "store.mip"; + break; + case ac_image_atomic: + name = "atomic."; + atomic_subop = get_atomic_name(a->atomic); + break; + case ac_image_atomic_cmpswap: + name = "atomic."; + atomic_subop = "cmpswap"; + break; + case ac_image_get_lod: + name = "getlod"; + break; + case ac_image_get_resinfo: + name = "getresinfo"; + break; + default: + unreachable("invalid image opcode"); + } + + const char *dimname; + switch (dim) { + case ac_image_1d: + dimname = "1d"; + break; + case ac_image_2d: + dimname = "2d"; + break; + case ac_image_3d: + dimname = "3d"; + break; + case ac_image_cube: + dimname = "cube"; + break; + case ac_image_1darray: + dimname = "1darray"; + break; + case ac_image_2darray: + dimname = "2darray"; + break; + case ac_image_2dmsaa: + dimname = "2dmsaa"; + break; + case ac_image_2darraymsaa: + dimname = "2darraymsaa"; + break; + default: + unreachable("invalid dim"); + } + + bool lod_suffix = a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); + char intr_name[96]; + snprintf(intr_name, sizeof(intr_name), + "llvm.amdgcn.image.%s%s" /* base name */ + "%s%s%s%s" /* sample/gather modifiers */ + ".%s.%s%s%s%s", /* dimension and type overloads */ + name, atomic_subop, a->compare ? ".c" : "", + a->bias ? ".b" : lod_suffix ? ".l" : a->derivs[0] ? ".d" : a->level_zero ? ".lz" : "", + a->min_lod ? ".cl" : "", a->offset ? ".o" : "", dimname, + atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"), overload[0], overload[1], overload[2]); + + LLVMTypeRef retty; + if (atomic) + retty = ctx->i32; + else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip) + retty = ctx->voidt; + else + retty = a->d16 ? ctx->v4f16 : ctx->v4f32; + + LLVMValueRef result = ac_build_intrinsic(ctx, intr_name, retty, args, num_args, a->attributes); + if (!sample && !atomic && retty != ctx->voidt) + result = ac_to_integer(ctx, result); + + return result; +} + +LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, LLVMValueRef rsrc) +{ + LLVMValueRef samples; + + /* Read the samples from the descriptor directly. + * Hardware doesn't have any instruction for this. + */ + samples = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 3, 0), ""); + samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, 0), ""); + samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), ""); + samples = LLVMBuildShl(ctx->builder, ctx->i32_1, samples, ""); + return samples; +} + +LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) +{ + return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, args, 2, + AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) +{ + LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); +} + +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) +{ + LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } /* The 8-bit and 10-bit clamping is for HW workarounds. */ -LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, - LLVMValueRef args[2], unsigned bits, bool hi) +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, + bool hi) { - assert(bits == 8 || bits == 10 || bits == 16); + assert(bits == 8 || bits == 10 || bits == 16); - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, - bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); - LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, - bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); - LLVMValueRef max_alpha = - bits != 10 ? max_rgb : ctx->i32_1; - LLVMValueRef min_alpha = - bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); - - /* Clamp. */ - if (bits != 16) { - for (int i = 0; i < 2; i++) { - bool alpha = hi && i == 1; - args[i] = ac_build_imin(ctx, args[i], - alpha ? max_alpha : max_rgb); - args[i] = ac_build_imax(ctx, args[i], - alpha ? min_alpha : min_rgb); - } - } - - LLVMValueRef res = - ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", - ctx->v2i16, args, 2, - AC_FUNC_ATTR_READNONE); - return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); + LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); + LLVMValueRef max_alpha = bits != 10 ? max_rgb : ctx->i32_1; + LLVMValueRef min_alpha = bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); + + /* Clamp. */ + if (bits != 16) { + for (int i = 0; i < 2; i++) { + bool alpha = hi && i == 1; + args[i] = ac_build_imin(ctx, args[i], alpha ? max_alpha : max_rgb); + args[i] = ac_build_imax(ctx, args[i], alpha ? min_alpha : min_rgb); + } + } + + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } /* The 8-bit and 10-bit clamping is for HW workarounds. */ -LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, - LLVMValueRef args[2], unsigned bits, bool hi) +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, + bool hi) { - assert(bits == 8 || bits == 10 || bits == 16); + assert(bits == 8 || bits == 10 || bits == 16); + + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); + LLVMValueRef max_alpha = bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); - LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, - bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); - LLVMValueRef max_alpha = - bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); - - /* Clamp. */ - if (bits != 16) { - for (int i = 0; i < 2; i++) { - bool alpha = hi && i == 1; - args[i] = ac_build_umin(ctx, args[i], - alpha ? max_alpha : max_rgb); - } - } - - LLVMValueRef res = - ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", - ctx->v2i16, args, 2, - AC_FUNC_ATTR_READNONE); - return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); + /* Clamp. */ + if (bits != 16) { + for (int i = 0; i < 2; i++) { + bool alpha = hi && i == 1; + args[i] = ac_build_umin(ctx, args[i], alpha ? max_alpha : max_rgb); + } + } + + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) { - return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, - &i1, 1, AC_FUNC_ATTR_READNONE); + return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, &i1, 1, AC_FUNC_ATTR_READNONE); } void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1) { - ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, - &i1, 1, 0); + ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, &i1, 1, 0); } -LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, - LLVMValueRef offset, LLVMValueRef width, - bool is_signed) +LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset, + LLVMValueRef width, bool is_signed) { - LLVMValueRef args[] = { - input, - offset, - width, - }; - - return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : - "llvm.amdgcn.ubfe.i32", - ctx->i32, args, 3, AC_FUNC_ATTR_READNONE); + LLVMValueRef args[] = { + input, + offset, + width, + }; + return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : "llvm.amdgcn.ubfe.i32", + ctx->i32, args, 3, AC_FUNC_ATTR_READNONE); } -LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, - LLVMValueRef s1, LLVMValueRef s2) +LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, + LLVMValueRef s2) { - return LLVMBuildAdd(ctx->builder, - LLVMBuildMul(ctx->builder, s0, s1, ""), s2, ""); + return LLVMBuildAdd(ctx->builder, LLVMBuildMul(ctx->builder, s0, s1, ""), s2, ""); } -LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, - LLVMValueRef s1, LLVMValueRef s2) +LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, + LLVMValueRef s2) { - /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ - if (ctx->chip_class >= GFX10) { - return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32, - (LLVMValueRef []) {s0, s1, s2}, 3, - AC_FUNC_ATTR_READNONE); - } + /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ + if (ctx->chip_class >= GFX10) { + return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32, (LLVMValueRef[]){s0, s1, s2}, 3, + AC_FUNC_ATTR_READNONE); + } - return LLVMBuildFAdd(ctx->builder, - LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, ""); + return LLVMBuildFAdd(ctx->builder, LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, ""); } void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags) { - if (!wait_flags) - return; + if (!wait_flags) + return; - unsigned lgkmcnt = 63; - unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15; - unsigned vscnt = 63; - - if (wait_flags & AC_WAIT_LGKM) - lgkmcnt = 0; - if (wait_flags & AC_WAIT_VLOAD) - vmcnt = 0; - - if (wait_flags & AC_WAIT_VSTORE) { - if (ctx->chip_class >= GFX10) - vscnt = 0; - else - vmcnt = 0; - } - - /* There is no intrinsic for vscnt(0), so use a fence. */ - if ((wait_flags & AC_WAIT_LGKM && - wait_flags & AC_WAIT_VLOAD && - wait_flags & AC_WAIT_VSTORE) || - vscnt == 0) { - LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, ""); - return; - } - - unsigned simm16 = (lgkmcnt << 8) | - (7 << 4) | /* expcnt */ - (vmcnt & 0xf) | - ((vmcnt >> 4) << 14); - - LLVMValueRef args[1] = { - LLVMConstInt(ctx->i32, simm16, false), - }; - ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", - ctx->voidt, args, 1, 0); -} - -LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0, - LLVMValueRef src1, LLVMValueRef src2, - unsigned bitsize) -{ - LLVMValueRef result; - - if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8)) { - /* Lower 64-bit fmed because LLVM doesn't expose an intrinsic, - * or lower 16-bit fmed because it's only supported on GFX9+. - */ - LLVMValueRef min1, min2, max1; - - min1 = ac_build_fmin(ctx, src0, src1); - max1 = ac_build_fmax(ctx, src0, src1); - min2 = ac_build_fmin(ctx, max1, src2); - - result = ac_build_fmax(ctx, min2, min1); - } else { - LLVMTypeRef type; - char *intr; - - if (bitsize == 16) { - intr = "llvm.amdgcn.fmed3.f16"; - type = ctx->f16; - } else { - assert(bitsize == 32); - intr = "llvm.amdgcn.fmed3.f32"; - type = ctx->f32; - } - - LLVMValueRef params[] = { - src0, - src1, - src2, - }; - - result = ac_build_intrinsic(ctx, intr, type, params, 3, - AC_FUNC_ATTR_READNONE); - } - - if (ctx->chip_class < GFX9 && bitsize == 32) { - /* Only pre-GFX9 chips do not flush denorms. */ - result = ac_build_canonicalize(ctx, result, bitsize); - } - - return result; -} - -LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize) -{ - LLVMTypeRef type; - char *intr; - - if (bitsize == 16) { - intr = "llvm.amdgcn.fract.f16"; - type = ctx->f16; - } else if (bitsize == 32) { - intr = "llvm.amdgcn.fract.f32"; - type = ctx->f32; - } else { - intr = "llvm.amdgcn.fract.f64"; - type = ctx->f64; - } - - LLVMValueRef params[] = { - src0, - }; - return ac_build_intrinsic(ctx, intr, type, params, 1, - AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize) -{ - LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, bitsize); - LLVMValueRef zero = LLVMConstInt(type, 0, false); - LLVMValueRef one = LLVMConstInt(type, 1, false); - - LLVMValueRef cmp, val; - cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, ""); - val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); - cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, ""); - val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), ""); - return val; -} - -LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize) -{ - LLVMValueRef cmp, val, zero, one; - LLVMTypeRef type; - - if (bitsize == 16) { - type = ctx->f16; - zero = ctx->f16_0; - one = ctx->f16_1; - } else if (bitsize == 32) { - type = ctx->f32; - zero = ctx->f32_0; - one = ctx->f32_1; - } else { - type = ctx->f64; - zero = ctx->f64_0; - one = ctx->f64_1; - } - - cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, ""); - val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); - cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, ""); - val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), ""); - return val; + unsigned lgkmcnt = 63; + unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15; + unsigned vscnt = 63; + + if (wait_flags & AC_WAIT_LGKM) + lgkmcnt = 0; + if (wait_flags & AC_WAIT_VLOAD) + vmcnt = 0; + + if (wait_flags & AC_WAIT_VSTORE) { + if (ctx->chip_class >= GFX10) + vscnt = 0; + else + vmcnt = 0; + } + + /* There is no intrinsic for vscnt(0), so use a fence. */ + if ((wait_flags & AC_WAIT_LGKM && wait_flags & AC_WAIT_VLOAD && wait_flags & AC_WAIT_VSTORE) || + vscnt == 0) { + LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, ""); + return; + } + + unsigned simm16 = (lgkmcnt << 8) | (7 << 4) | /* expcnt */ + (vmcnt & 0xf) | ((vmcnt >> 4) << 14); + + LLVMValueRef args[1] = { + LLVMConstInt(ctx->i32, simm16, false), + }; + ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0); +} + +LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1, + LLVMValueRef src2, unsigned bitsize) +{ + LLVMValueRef result; + + if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8)) { + /* Lower 64-bit fmed because LLVM doesn't expose an intrinsic, + * or lower 16-bit fmed because it's only supported on GFX9+. + */ + LLVMValueRef min1, min2, max1; + + min1 = ac_build_fmin(ctx, src0, src1); + max1 = ac_build_fmax(ctx, src0, src1); + min2 = ac_build_fmin(ctx, max1, src2); + + result = ac_build_fmax(ctx, min2, min1); + } else { + LLVMTypeRef type; + char *intr; + + if (bitsize == 16) { + intr = "llvm.amdgcn.fmed3.f16"; + type = ctx->f16; + } else { + assert(bitsize == 32); + intr = "llvm.amdgcn.fmed3.f32"; + type = ctx->f32; + } + + LLVMValueRef params[] = { + src0, + src1, + src2, + }; + + result = ac_build_intrinsic(ctx, intr, type, params, 3, AC_FUNC_ATTR_READNONE); + } + + if (ctx->chip_class < GFX9 && bitsize == 32) { + /* Only pre-GFX9 chips do not flush denorms. */ + result = ac_build_canonicalize(ctx, result, bitsize); + } + + return result; +} + +LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + LLVMTypeRef type; + char *intr; + + if (bitsize == 16) { + intr = "llvm.amdgcn.fract.f16"; + type = ctx->f16; + } else if (bitsize == 32) { + intr = "llvm.amdgcn.fract.f32"; + type = ctx->f32; + } else { + intr = "llvm.amdgcn.fract.f64"; + type = ctx->f64; + } + + LLVMValueRef params[] = { + src0, + }; + return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, bitsize); + LLVMValueRef zero = LLVMConstInt(type, 0, false); + LLVMValueRef one = LLVMConstInt(type, 1, false); + + LLVMValueRef cmp, val; + cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, ""); + val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); + cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, ""); + val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), ""); + return val; +} + +LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + LLVMValueRef cmp, val, zero, one; + LLVMTypeRef type; + + if (bitsize == 16) { + type = ctx->f16; + zero = ctx->f16_0; + one = ctx->f16_1; + } else if (bitsize == 32) { + type = ctx->f32; + zero = ctx->f32_0; + one = ctx->f32_1; + } else { + type = ctx->f64; + zero = ctx->f64_0; + one = ctx->f64_1; + } + + cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, ""); + val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); + cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, ""); + val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), ""); + return val; } LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0) { - LLVMValueRef result; - unsigned bitsize; + LLVMValueRef result; + unsigned bitsize; - bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); + bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); - switch (bitsize) { - case 128: - result = ac_build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); - break; - case 64: - result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - - result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); - break; - case 32: - result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - break; - case 16: - result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - - result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); - break; - case 8: - result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - - result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); - break; - default: - unreachable(!"invalid bitsize"); - break; - } - - return result; -} - -LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef result; - unsigned bitsize; - - bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); - - switch (bitsize) { - case 64: - result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - - result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); - break; - case 32: - result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - break; - case 16: - result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - - result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); - break; - case 8: - result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, - (LLVMValueRef []) { src0 }, 1, - AC_FUNC_ATTR_READNONE); - - result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); - break; - default: - unreachable(!"invalid bitsize"); - break; - } + switch (bitsize) { + case 128: + result = ac_build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); + break; + case 64: + result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + + result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); + break; + case 32: + result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + break; + case 16: + result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + + result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); + break; + case 8: + result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + + result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); + break; + default: + unreachable(!"invalid bitsize"); + break; + } + + return result; +} + +LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef result; + unsigned bitsize; + + bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); + + switch (bitsize) { + case 64: + result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + + result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); + break; + case 32: + result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + break; + case 16: + result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + + result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); + break; + case 8: + result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, (LLVMValueRef[]){src0}, 1, + AC_FUNC_ATTR_READNONE); + + result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); + break; + default: + unreachable(!"invalid bitsize"); + break; + } - return result; + return result; } -#define AC_EXP_TARGET 0 +#define AC_EXP_TARGET 0 #define AC_EXP_ENABLED_CHANNELS 1 -#define AC_EXP_OUT0 2 +#define AC_EXP_OUT0 2 -enum ac_ir_type { - AC_IR_UNDEF, - AC_IR_CONST, - AC_IR_VALUE, +enum ac_ir_type +{ + AC_IR_UNDEF, + AC_IR_CONST, + AC_IR_VALUE, }; -struct ac_vs_exp_chan -{ - LLVMValueRef value; - float const_float; - enum ac_ir_type type; +struct ac_vs_exp_chan { + LLVMValueRef value; + float const_float; + enum ac_ir_type type; }; struct ac_vs_exp_inst { - unsigned offset; - LLVMValueRef inst; - struct ac_vs_exp_chan chan[4]; + unsigned offset; + LLVMValueRef inst; + struct ac_vs_exp_chan chan[4]; }; struct ac_vs_exports { - unsigned num; - struct ac_vs_exp_inst exp[VARYING_SLOT_MAX]; + unsigned num; + struct ac_vs_exp_inst exp[VARYING_SLOT_MAX]; }; /* Return true if the PARAM export has been eliminated. */ -static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, - uint32_t num_outputs, - struct ac_vs_exp_inst *exp) -{ - unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ - bool is_zero[4] = {}, is_one[4] = {}; - - for (i = 0; i < 4; i++) { - /* It's a constant expression. Undef outputs are eliminated too. */ - if (exp->chan[i].type == AC_IR_UNDEF) { - is_zero[i] = true; - is_one[i] = true; - } else if (exp->chan[i].type == AC_IR_CONST) { - if (exp->chan[i].const_float == 0) - is_zero[i] = true; - else if (exp->chan[i].const_float == 1) - is_one[i] = true; - else - return false; /* other constant */ - } else - return false; - } - - /* Only certain combinations of 0 and 1 can be eliminated. */ - if (is_zero[0] && is_zero[1] && is_zero[2]) - default_val = is_zero[3] ? 0 : 1; - else if (is_one[0] && is_one[1] && is_one[2]) - default_val = is_zero[3] ? 2 : 3; - else - return false; - - /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ - LLVMInstructionEraseFromParent(exp->inst); - - /* Change OFFSET to DEFAULT_VAL. */ - for (i = 0; i < num_outputs; i++) { - if (vs_output_param_offset[i] == exp->offset) { - vs_output_param_offset[i] = - AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val; - break; - } - } - return true; +static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, uint32_t num_outputs, + struct ac_vs_exp_inst *exp) +{ + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ + bool is_zero[4] = {}, is_one[4] = {}; + + for (i = 0; i < 4; i++) { + /* It's a constant expression. Undef outputs are eliminated too. */ + if (exp->chan[i].type == AC_IR_UNDEF) { + is_zero[i] = true; + is_one[i] = true; + } else if (exp->chan[i].type == AC_IR_CONST) { + if (exp->chan[i].const_float == 0) + is_zero[i] = true; + else if (exp->chan[i].const_float == 1) + is_one[i] = true; + else + return false; /* other constant */ + } else + return false; + } + + /* Only certain combinations of 0 and 1 can be eliminated. */ + if (is_zero[0] && is_zero[1] && is_zero[2]) + default_val = is_zero[3] ? 0 : 1; + else if (is_one[0] && is_one[1] && is_one[2]) + default_val = is_zero[3] ? 2 : 3; + else + return false; + + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ + LLVMInstructionEraseFromParent(exp->inst); + + /* Change OFFSET to DEFAULT_VAL. */ + for (i = 0; i < num_outputs; i++) { + if (vs_output_param_offset[i] == exp->offset) { + vs_output_param_offset[i] = AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val; + break; + } + } + return true; } static bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx, - uint8_t *vs_output_param_offset, - uint32_t num_outputs, - struct ac_vs_exports *processed, - struct ac_vs_exp_inst *exp) -{ - unsigned p, copy_back_channels = 0; - - /* See if the output is already in the list of processed outputs. - * The LLVMValueRef comparison relies on SSA. - */ - for (p = 0; p < processed->num; p++) { - bool different = false; - - for (unsigned j = 0; j < 4; j++) { - struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j]; - struct ac_vs_exp_chan *c2 = &exp->chan[j]; - - /* Treat undef as a match. */ - if (c2->type == AC_IR_UNDEF) - continue; - - /* If c1 is undef but c2 isn't, we can copy c2 to c1 - * and consider the instruction duplicated. - */ - if (c1->type == AC_IR_UNDEF) { - copy_back_channels |= 1 << j; - continue; - } - - /* Test whether the channels are not equal. */ - if (c1->type != c2->type || - (c1->type == AC_IR_CONST && - c1->const_float != c2->const_float) || - (c1->type == AC_IR_VALUE && - c1->value != c2->value)) { - different = true; - break; - } - } - if (!different) - break; - - copy_back_channels = 0; - } - if (p == processed->num) - return false; - - /* If a match was found, but the matching export has undef where the new - * one has a normal value, copy the normal value to the undef channel. - */ - struct ac_vs_exp_inst *match = &processed->exp[p]; - - /* Get current enabled channels mask. */ - LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS); - unsigned enabled_channels = LLVMConstIntGetZExtValue(arg); - - while (copy_back_channels) { - unsigned chan = u_bit_scan(©_back_channels); - - assert(match->chan[chan].type == AC_IR_UNDEF); - LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, - exp->chan[chan].value); - match->chan[chan] = exp->chan[chan]; - - /* Update number of enabled channels because the original mask - * is not always 0xf. - */ - enabled_channels |= (1 << chan); - LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS, - LLVMConstInt(ctx->i32, enabled_channels, 0)); - } - - /* The PARAM export is duplicated. Kill it. */ - LLVMInstructionEraseFromParent(exp->inst); - - /* Change OFFSET to the matching export. */ - for (unsigned i = 0; i < num_outputs; i++) { - if (vs_output_param_offset[i] == exp->offset) { - vs_output_param_offset[i] = match->offset; - break; - } - } - return true; -} - -void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, - LLVMValueRef main_fn, - uint8_t *vs_output_param_offset, - uint32_t num_outputs, - uint32_t skip_output_mask, - uint8_t *num_param_exports) -{ - LLVMBasicBlockRef bb; - bool removed_any = false; - struct ac_vs_exports exports; - - exports.num = 0; - - /* Process all LLVM instructions. */ - bb = LLVMGetFirstBasicBlock(main_fn); - while (bb) { - LLVMValueRef inst = LLVMGetFirstInstruction(bb); - - while (inst) { - LLVMValueRef cur = inst; - inst = LLVMGetNextInstruction(inst); - struct ac_vs_exp_inst exp; - - if (LLVMGetInstructionOpcode(cur) != LLVMCall) - continue; - - LLVMValueRef callee = ac_llvm_get_called_value(cur); - - if (!ac_llvm_is_function(callee)) - continue; - - const char *name = LLVMGetValueName(callee); - unsigned num_args = LLVMCountParams(callee); - - /* Check if this is an export instruction. */ - if ((num_args != 9 && num_args != 8) || - (strcmp(name, "llvm.SI.export") && - strcmp(name, "llvm.amdgcn.exp.f32"))) - continue; - - LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET); - unsigned target = LLVMConstIntGetZExtValue(arg); - - if (target < V_008DFC_SQ_EXP_PARAM) - continue; - - target -= V_008DFC_SQ_EXP_PARAM; - - /* Parse the instruction. */ - memset(&exp, 0, sizeof(exp)); - exp.offset = target; - exp.inst = cur; - - for (unsigned i = 0; i < 4; i++) { - LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i); - - exp.chan[i].value = v; - - if (LLVMIsUndef(v)) { - exp.chan[i].type = AC_IR_UNDEF; - } else if (LLVMIsAConstantFP(v)) { - LLVMBool loses_info; - exp.chan[i].type = AC_IR_CONST; - exp.chan[i].const_float = - LLVMConstRealGetDouble(v, &loses_info); - } else { - exp.chan[i].type = AC_IR_VALUE; - } - } - - /* Eliminate constant and duplicated PARAM exports. */ - if (!((1u << target) & skip_output_mask) && - (ac_eliminate_const_output(vs_output_param_offset, - num_outputs, &exp) || - ac_eliminate_duplicated_output(ctx, - vs_output_param_offset, - num_outputs, &exports, - &exp))) { - removed_any = true; - } else { - exports.exp[exports.num++] = exp; - } - } - bb = LLVMGetNextBasicBlock(bb); - } - - /* Remove holes in export memory due to removed PARAM exports. - * This is done by renumbering all PARAM exports. - */ - if (removed_any) { - uint8_t old_offset[VARYING_SLOT_MAX]; - unsigned out, i; - - /* Make a copy of the offsets. We need the old version while - * we are modifying some of them. */ - memcpy(old_offset, vs_output_param_offset, - sizeof(old_offset)); - - for (i = 0; i < exports.num; i++) { - unsigned offset = exports.exp[i].offset; - - /* Update vs_output_param_offset. Multiple outputs can - * have the same offset. - */ - for (out = 0; out < num_outputs; out++) { - if (old_offset[out] == offset) - vs_output_param_offset[out] = i; - } - - /* Change the PARAM offset in the instruction. */ - LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET, - LLVMConstInt(ctx->i32, - V_008DFC_SQ_EXP_PARAM + i, 0)); - } - *num_param_exports = exports.num; - } + uint8_t *vs_output_param_offset, uint32_t num_outputs, + struct ac_vs_exports *processed, + struct ac_vs_exp_inst *exp) +{ + unsigned p, copy_back_channels = 0; + + /* See if the output is already in the list of processed outputs. + * The LLVMValueRef comparison relies on SSA. + */ + for (p = 0; p < processed->num; p++) { + bool different = false; + + for (unsigned j = 0; j < 4; j++) { + struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j]; + struct ac_vs_exp_chan *c2 = &exp->chan[j]; + + /* Treat undef as a match. */ + if (c2->type == AC_IR_UNDEF) + continue; + + /* If c1 is undef but c2 isn't, we can copy c2 to c1 + * and consider the instruction duplicated. + */ + if (c1->type == AC_IR_UNDEF) { + copy_back_channels |= 1 << j; + continue; + } + + /* Test whether the channels are not equal. */ + if (c1->type != c2->type || + (c1->type == AC_IR_CONST && c1->const_float != c2->const_float) || + (c1->type == AC_IR_VALUE && c1->value != c2->value)) { + different = true; + break; + } + } + if (!different) + break; + + copy_back_channels = 0; + } + if (p == processed->num) + return false; + + /* If a match was found, but the matching export has undef where the new + * one has a normal value, copy the normal value to the undef channel. + */ + struct ac_vs_exp_inst *match = &processed->exp[p]; + + /* Get current enabled channels mask. */ + LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS); + unsigned enabled_channels = LLVMConstIntGetZExtValue(arg); + + while (copy_back_channels) { + unsigned chan = u_bit_scan(©_back_channels); + + assert(match->chan[chan].type == AC_IR_UNDEF); + LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, exp->chan[chan].value); + match->chan[chan] = exp->chan[chan]; + + /* Update number of enabled channels because the original mask + * is not always 0xf. + */ + enabled_channels |= (1 << chan); + LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS, + LLVMConstInt(ctx->i32, enabled_channels, 0)); + } + + /* The PARAM export is duplicated. Kill it. */ + LLVMInstructionEraseFromParent(exp->inst); + + /* Change OFFSET to the matching export. */ + for (unsigned i = 0; i < num_outputs; i++) { + if (vs_output_param_offset[i] == exp->offset) { + vs_output_param_offset[i] = match->offset; + break; + } + } + return true; +} + +void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, LLVMValueRef main_fn, + uint8_t *vs_output_param_offset, uint32_t num_outputs, + uint32_t skip_output_mask, uint8_t *num_param_exports) +{ + LLVMBasicBlockRef bb; + bool removed_any = false; + struct ac_vs_exports exports; + + exports.num = 0; + + /* Process all LLVM instructions. */ + bb = LLVMGetFirstBasicBlock(main_fn); + while (bb) { + LLVMValueRef inst = LLVMGetFirstInstruction(bb); + + while (inst) { + LLVMValueRef cur = inst; + inst = LLVMGetNextInstruction(inst); + struct ac_vs_exp_inst exp; + + if (LLVMGetInstructionOpcode(cur) != LLVMCall) + continue; + + LLVMValueRef callee = ac_llvm_get_called_value(cur); + + if (!ac_llvm_is_function(callee)) + continue; + + const char *name = LLVMGetValueName(callee); + unsigned num_args = LLVMCountParams(callee); + + /* Check if this is an export instruction. */ + if ((num_args != 9 && num_args != 8) || + (strcmp(name, "llvm.SI.export") && strcmp(name, "llvm.amdgcn.exp.f32"))) + continue; + + LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET); + unsigned target = LLVMConstIntGetZExtValue(arg); + + if (target < V_008DFC_SQ_EXP_PARAM) + continue; + + target -= V_008DFC_SQ_EXP_PARAM; + + /* Parse the instruction. */ + memset(&exp, 0, sizeof(exp)); + exp.offset = target; + exp.inst = cur; + + for (unsigned i = 0; i < 4; i++) { + LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i); + + exp.chan[i].value = v; + + if (LLVMIsUndef(v)) { + exp.chan[i].type = AC_IR_UNDEF; + } else if (LLVMIsAConstantFP(v)) { + LLVMBool loses_info; + exp.chan[i].type = AC_IR_CONST; + exp.chan[i].const_float = LLVMConstRealGetDouble(v, &loses_info); + } else { + exp.chan[i].type = AC_IR_VALUE; + } + } + + /* Eliminate constant and duplicated PARAM exports. */ + if (!((1u << target) & skip_output_mask) && + (ac_eliminate_const_output(vs_output_param_offset, num_outputs, &exp) || + ac_eliminate_duplicated_output(ctx, vs_output_param_offset, num_outputs, &exports, + &exp))) { + removed_any = true; + } else { + exports.exp[exports.num++] = exp; + } + } + bb = LLVMGetNextBasicBlock(bb); + } + + /* Remove holes in export memory due to removed PARAM exports. + * This is done by renumbering all PARAM exports. + */ + if (removed_any) { + uint8_t old_offset[VARYING_SLOT_MAX]; + unsigned out, i; + + /* Make a copy of the offsets. We need the old version while + * we are modifying some of them. */ + memcpy(old_offset, vs_output_param_offset, sizeof(old_offset)); + + for (i = 0; i < exports.num; i++) { + unsigned offset = exports.exp[i].offset; + + /* Update vs_output_param_offset. Multiple outputs can + * have the same offset. + */ + for (out = 0; out < num_outputs; out++) { + if (old_offset[out] == offset) + vs_output_param_offset[out] = i; + } + + /* Change the PARAM offset in the instruction. */ + LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET, + LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_PARAM + i, 0)); + } + *num_param_exports = exports.num; + } } void ac_init_exec_full_mask(struct ac_llvm_context *ctx) { - LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); - ac_build_intrinsic(ctx, - "llvm.amdgcn.init.exec", ctx->voidt, - &full_mask, 1, AC_FUNC_ATTR_CONVERGENT); + LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); + ac_build_intrinsic(ctx, "llvm.amdgcn.init.exec", ctx->voidt, &full_mask, 1, + AC_FUNC_ATTR_CONVERGENT); } void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) { - unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768; - ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0, - LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), - "lds"); -} - -LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, - LLVMValueRef dw_addr) -{ - return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), ""); -} - -void ac_lds_store(struct ac_llvm_context *ctx, - LLVMValueRef dw_addr, - LLVMValueRef value) -{ - value = ac_to_integer(ctx, value); - ac_build_indexed_store(ctx, ctx->lds, - dw_addr, value); -} - -LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, - LLVMTypeRef dst_type, - LLVMValueRef src0) -{ - unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); - const char *intrin_name; - LLVMTypeRef type; - LLVMValueRef zero; - - switch (src0_bitsize) { - case 64: - intrin_name = "llvm.cttz.i64"; - type = ctx->i64; - zero = ctx->i64_0; - break; - case 32: - intrin_name = "llvm.cttz.i32"; - type = ctx->i32; - zero = ctx->i32_0; - break; - case 16: - intrin_name = "llvm.cttz.i16"; - type = ctx->i16; - zero = ctx->i16_0; - break; - case 8: - intrin_name = "llvm.cttz.i8"; - type = ctx->i8; - zero = ctx->i8_0; - break; - default: - unreachable(!"invalid bitsize"); - } - - LLVMValueRef params[2] = { - src0, - - /* The value of 1 means that ffs(x=0) = undef, so LLVM won't - * add special code to check for x=0. The reason is that - * the LLVM behavior for x=0 is different from what we - * need here. However, LLVM also assumes that ffs(x) is - * in [0, 31], but GLSL expects that ffs(0) = -1, so - * a conditional assignment to handle 0 is still required. - * - * The hardware already implements the correct behavior. - */ - ctx->i1true, - }; - - LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type, - params, 2, - AC_FUNC_ATTR_READNONE); - - if (src0_bitsize == 64) { - lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, ""); - } else if (src0_bitsize < 32) { - lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, ""); - } - - /* TODO: We need an intrinsic to skip this conditional. */ - /* Check for zero: */ - return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, - LLVMIntEQ, src0, - zero, ""), - LLVMConstInt(ctx->i32, -1, 0), lsb, ""); + unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768; + ctx->lds = LLVMBuildIntToPtr( + ctx->builder, ctx->i32_0, + LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), "lds"); +} + +LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr) +{ + return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), ""); +} + +void ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value) +{ + value = ac_to_integer(ctx, value); + ac_build_indexed_store(ctx, ctx->lds, dw_addr, value); +} + +LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0) +{ + unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); + const char *intrin_name; + LLVMTypeRef type; + LLVMValueRef zero; + + switch (src0_bitsize) { + case 64: + intrin_name = "llvm.cttz.i64"; + type = ctx->i64; + zero = ctx->i64_0; + break; + case 32: + intrin_name = "llvm.cttz.i32"; + type = ctx->i32; + zero = ctx->i32_0; + break; + case 16: + intrin_name = "llvm.cttz.i16"; + type = ctx->i16; + zero = ctx->i16_0; + break; + case 8: + intrin_name = "llvm.cttz.i8"; + type = ctx->i8; + zero = ctx->i8_0; + break; + default: + unreachable(!"invalid bitsize"); + } + + LLVMValueRef params[2] = { + src0, + + /* The value of 1 means that ffs(x=0) = undef, so LLVM won't + * add special code to check for x=0. The reason is that + * the LLVM behavior for x=0 is different from what we + * need here. However, LLVM also assumes that ffs(x) is + * in [0, 31], but GLSL expects that ffs(0) = -1, so + * a conditional assignment to handle 0 is still required. + * + * The hardware already implements the correct behavior. + */ + ctx->i1true, + }; + + LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE); + + if (src0_bitsize == 64) { + lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, ""); + } else if (src0_bitsize < 32) { + lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, ""); + } + + /* TODO: We need an intrinsic to skip this conditional. */ + /* Check for zero: */ + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, zero, ""), + LLVMConstInt(ctx->i32, -1, 0), lsb, ""); } LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) { - return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST); + return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST); } LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) { - return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT); + return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT); } -static struct ac_llvm_flow * -get_current_flow(struct ac_llvm_context *ctx) +static struct ac_llvm_flow *get_current_flow(struct ac_llvm_context *ctx) { - if (ctx->flow->depth > 0) - return &ctx->flow->stack[ctx->flow->depth - 1]; - return NULL; + if (ctx->flow->depth > 0) + return &ctx->flow->stack[ctx->flow->depth - 1]; + return NULL; } -static struct ac_llvm_flow * -get_innermost_loop(struct ac_llvm_context *ctx) +static struct ac_llvm_flow *get_innermost_loop(struct ac_llvm_context *ctx) { - for (unsigned i = ctx->flow->depth; i > 0; --i) { - if (ctx->flow->stack[i - 1].loop_entry_block) - return &ctx->flow->stack[i - 1]; - } - return NULL; + for (unsigned i = ctx->flow->depth; i > 0; --i) { + if (ctx->flow->stack[i - 1].loop_entry_block) + return &ctx->flow->stack[i - 1]; + } + return NULL; } -static struct ac_llvm_flow * -push_flow(struct ac_llvm_context *ctx) +static struct ac_llvm_flow *push_flow(struct ac_llvm_context *ctx) { - struct ac_llvm_flow *flow; + struct ac_llvm_flow *flow; - if (ctx->flow->depth >= ctx->flow->depth_max) { - unsigned new_max = MAX2(ctx->flow->depth << 1, - AC_LLVM_INITIAL_CF_DEPTH); + if (ctx->flow->depth >= ctx->flow->depth_max) { + unsigned new_max = MAX2(ctx->flow->depth << 1, AC_LLVM_INITIAL_CF_DEPTH); - ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack)); - ctx->flow->depth_max = new_max; - } + ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack)); + ctx->flow->depth_max = new_max; + } - flow = &ctx->flow->stack[ctx->flow->depth]; - ctx->flow->depth++; + flow = &ctx->flow->stack[ctx->flow->depth]; + ctx->flow->depth++; - flow->next_block = NULL; - flow->loop_entry_block = NULL; - return flow; + flow->next_block = NULL; + flow->loop_entry_block = NULL; + return flow; } -static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, - int label_id) +static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int label_id) { - char buf[32]; - snprintf(buf, sizeof(buf), "%s%d", base, label_id); - LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf); + char buf[32]; + snprintf(buf, sizeof(buf), "%s%d", base, label_id); + LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf); } /* Append a basic block at the level of the parent flow. */ -static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, - const char *name) +static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, const char *name) { - assert(ctx->flow->depth >= 1); + assert(ctx->flow->depth >= 1); - if (ctx->flow->depth >= 2) { - struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2]; + if (ctx->flow->depth >= 2) { + struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2]; - return LLVMInsertBasicBlockInContext(ctx->context, - flow->next_block, name); - } + return LLVMInsertBasicBlockInContext(ctx->context, flow->next_block, name); + } - LLVMValueRef main_fn = - LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)); - return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name); + LLVMValueRef main_fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)); + return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name); } /* Emit a branch to the given default target for the current block if * applicable -- that is, if the current block does not already contain a * branch from a break or continue. */ -static void emit_default_branch(LLVMBuilderRef builder, - LLVMBasicBlockRef target) +static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target) { - if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) - LLVMBuildBr(builder, target); + if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) + LLVMBuildBr(builder, target); } void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id) { - struct ac_llvm_flow *flow = push_flow(ctx); - flow->loop_entry_block = append_basic_block(ctx, "LOOP"); - flow->next_block = append_basic_block(ctx, "ENDLOOP"); - set_basicblock_name(flow->loop_entry_block, "loop", label_id); - LLVMBuildBr(ctx->builder, flow->loop_entry_block); - LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block); + struct ac_llvm_flow *flow = push_flow(ctx); + flow->loop_entry_block = append_basic_block(ctx, "LOOP"); + flow->next_block = append_basic_block(ctx, "ENDLOOP"); + set_basicblock_name(flow->loop_entry_block, "loop", label_id); + LLVMBuildBr(ctx->builder, flow->loop_entry_block); + LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block); } void ac_build_break(struct ac_llvm_context *ctx) { - struct ac_llvm_flow *flow = get_innermost_loop(ctx); - LLVMBuildBr(ctx->builder, flow->next_block); + struct ac_llvm_flow *flow = get_innermost_loop(ctx); + LLVMBuildBr(ctx->builder, flow->next_block); } void ac_build_continue(struct ac_llvm_context *ctx) { - struct ac_llvm_flow *flow = get_innermost_loop(ctx); - LLVMBuildBr(ctx->builder, flow->loop_entry_block); + struct ac_llvm_flow *flow = get_innermost_loop(ctx); + LLVMBuildBr(ctx->builder, flow->loop_entry_block); } void ac_build_else(struct ac_llvm_context *ctx, int label_id) { - struct ac_llvm_flow *current_branch = get_current_flow(ctx); - LLVMBasicBlockRef endif_block; + struct ac_llvm_flow *current_branch = get_current_flow(ctx); + LLVMBasicBlockRef endif_block; - assert(!current_branch->loop_entry_block); + assert(!current_branch->loop_entry_block); - endif_block = append_basic_block(ctx, "ENDIF"); - emit_default_branch(ctx->builder, endif_block); + endif_block = append_basic_block(ctx, "ENDIF"); + emit_default_branch(ctx->builder, endif_block); - LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); - set_basicblock_name(current_branch->next_block, "else", label_id); + LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); + set_basicblock_name(current_branch->next_block, "else", label_id); - current_branch->next_block = endif_block; + current_branch->next_block = endif_block; } void ac_build_endif(struct ac_llvm_context *ctx, int label_id) { - struct ac_llvm_flow *current_branch = get_current_flow(ctx); + struct ac_llvm_flow *current_branch = get_current_flow(ctx); - assert(!current_branch->loop_entry_block); + assert(!current_branch->loop_entry_block); - emit_default_branch(ctx->builder, current_branch->next_block); - LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); - set_basicblock_name(current_branch->next_block, "endif", label_id); + emit_default_branch(ctx->builder, current_branch->next_block); + LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); + set_basicblock_name(current_branch->next_block, "endif", label_id); - ctx->flow->depth--; + ctx->flow->depth--; } void ac_build_endloop(struct ac_llvm_context *ctx, int label_id) { - struct ac_llvm_flow *current_loop = get_current_flow(ctx); + struct ac_llvm_flow *current_loop = get_current_flow(ctx); - assert(current_loop->loop_entry_block); + assert(current_loop->loop_entry_block); - emit_default_branch(ctx->builder, current_loop->loop_entry_block); + emit_default_branch(ctx->builder, current_loop->loop_entry_block); - LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block); - set_basicblock_name(current_loop->next_block, "endloop", label_id); - ctx->flow->depth--; + LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block); + set_basicblock_name(current_loop->next_block, "endloop", label_id); + ctx->flow->depth--; } void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id) { - struct ac_llvm_flow *flow = push_flow(ctx); - LLVMBasicBlockRef if_block; + struct ac_llvm_flow *flow = push_flow(ctx); + LLVMBasicBlockRef if_block; - if_block = append_basic_block(ctx, "IF"); - flow->next_block = append_basic_block(ctx, "ELSE"); - set_basicblock_name(if_block, "if", label_id); - LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block); - LLVMPositionBuilderAtEnd(ctx->builder, if_block); + if_block = append_basic_block(ctx, "IF"); + flow->next_block = append_basic_block(ctx, "ELSE"); + set_basicblock_name(if_block, "if", label_id); + LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block); + LLVMPositionBuilderAtEnd(ctx->builder, if_block); } -void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, - int label_id) +void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, int label_id) { - LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, - value, ctx->f32_0, ""); - ac_build_ifcc(ctx, cond, label_id); + LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, value, ctx->f32_0, ""); + ac_build_ifcc(ctx, cond, label_id); } -void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, - int label_id) +void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, int label_id) { - LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, - ac_to_integer(ctx, value), - ctx->i32_0, ""); - ac_build_ifcc(ctx, cond, label_id); + LLVMValueRef cond = + LLVMBuildICmp(ctx->builder, LLVMIntNE, ac_to_integer(ctx, value), ctx->i32_0, ""); + ac_build_ifcc(ctx, cond, label_id); } -LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, - const char *name) +LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name) { - LLVMBuilderRef builder = ac->builder; - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); - LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); - LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context); - LLVMValueRef res; + LLVMBuilderRef builder = ac->builder; + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context); + LLVMValueRef res; - if (first_instr) { - LLVMPositionBuilderBefore(first_builder, first_instr); - } else { - LLVMPositionBuilderAtEnd(first_builder, first_block); - } + if (first_instr) { + LLVMPositionBuilderBefore(first_builder, first_instr); + } else { + LLVMPositionBuilderAtEnd(first_builder, first_block); + } - res = LLVMBuildAlloca(first_builder, type, name); - LLVMDisposeBuilder(first_builder); - return res; + res = LLVMBuildAlloca(first_builder, type, name); + LLVMDisposeBuilder(first_builder); + return res; } -LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, - LLVMTypeRef type, const char *name) +LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name) { - LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name); - LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr); - return ptr; + LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name); + LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr); + return ptr; } -LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, - LLVMTypeRef type) +LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMTypeRef type) { - int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - return LLVMBuildBitCast(ctx->builder, ptr, - LLVMPointerType(type, addr_space), ""); + int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + return LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), ""); } -LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, - unsigned count) +LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count) { - unsigned num_components = ac_get_llvm_num_components(value); - if (count == num_components) - return value; + unsigned num_components = ac_get_llvm_num_components(value); + if (count == num_components) + return value; - LLVMValueRef masks[MAX2(count, 2)]; - masks[0] = ctx->i32_0; - masks[1] = ctx->i32_1; - for (unsigned i = 2; i < count; i++) - masks[i] = LLVMConstInt(ctx->i32, i, false); + LLVMValueRef masks[MAX2(count, 2)]; + masks[0] = ctx->i32_0; + masks[1] = ctx->i32_1; + for (unsigned i = 2; i < count; i++) + masks[i] = LLVMConstInt(ctx->i32, i, false); - if (count == 1) - return LLVMBuildExtractElement(ctx->builder, value, masks[0], - ""); + if (count == 1) + return LLVMBuildExtractElement(ctx->builder, value, masks[0], ""); - LLVMValueRef swizzle = LLVMConstVector(masks, count); - return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, ""); + LLVMValueRef swizzle = LLVMConstVector(masks, count); + return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, ""); } -LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, - unsigned rshift, unsigned bitwidth) +LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift, + unsigned bitwidth) { - LLVMValueRef value = param; - if (rshift) - value = LLVMBuildLShr(ctx->builder, value, - LLVMConstInt(ctx->i32, rshift, false), ""); + LLVMValueRef value = param; + if (rshift) + value = LLVMBuildLShr(ctx->builder, value, LLVMConstInt(ctx->i32, rshift, false), ""); - if (rshift + bitwidth < 32) { - unsigned mask = (1 << bitwidth) - 1; - value = LLVMBuildAnd(ctx->builder, value, - LLVMConstInt(ctx->i32, mask, false), ""); - } - return value; + if (rshift + bitwidth < 32) { + unsigned mask = (1 << bitwidth) - 1; + value = LLVMBuildAnd(ctx->builder, value, LLVMConstInt(ctx->i32, mask, false), ""); + } + return value; } /* Adjust the sample index according to FMASK. @@ -3577,108 +3196,96 @@ * The sample index should be adjusted as follows: * addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF; */ -void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, - LLVMValueRef *addr, bool is_array_tex) +void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr, + bool is_array_tex) { - struct ac_image_args fmask_load = {}; - fmask_load.opcode = ac_image_load; - fmask_load.resource = fmask; - fmask_load.dmask = 0xf; - fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d; - fmask_load.attributes = AC_FUNC_ATTR_READNONE; - - fmask_load.coords[0] = addr[0]; - fmask_load.coords[1] = addr[1]; - if (is_array_tex) - fmask_load.coords[2] = addr[2]; - - LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); - fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, - ac->i32_0, ""); - - /* Apply the formula. */ - unsigned sample_chan = is_array_tex ? 3 : 2; - LLVMValueRef final_sample; - final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], - LLVMConstInt(ac->i32, 4, 0), ""); - final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, ""); - /* Mask the sample index by 0x7, because 0x8 means an unknown value - * with EQAA, so those will map to 0. */ - final_sample = LLVMBuildAnd(ac->builder, final_sample, - LLVMConstInt(ac->i32, 0x7, 0), ""); - - /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK - * resource descriptor is 0 (invalid). - */ - LLVMValueRef tmp; - tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, ""); - tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, ""); - tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, ""); - - /* Replace the MSAA sample index. */ - addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample, - addr[sample_chan], ""); -} - -static LLVMValueRef -_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, - LLVMValueRef lane, bool with_opt_barrier) -{ - LLVMTypeRef type = LLVMTypeOf(src); - LLVMValueRef result; - - if (with_opt_barrier) - ac_build_optimization_barrier(ctx, &src); - - src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); - if (lane) - lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, ""); - - result = ac_build_intrinsic(ctx, - lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane", - ctx->i32, (LLVMValueRef []) { src, lane }, - lane == NULL ? 1 : 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - - return LLVMBuildTrunc(ctx->builder, result, type, ""); -} - -static LLVMValueRef -ac_build_readlane_common(struct ac_llvm_context *ctx, - LLVMValueRef src, LLVMValueRef lane, - bool with_opt_barrier) -{ - LLVMTypeRef src_type = LLVMTypeOf(src); - src = ac_to_integer(ctx, src); - unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); - LLVMValueRef ret; - - if (bits > 32) { - assert(bits % 32 == 0); - LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); - LLVMValueRef src_vector = - LLVMBuildBitCast(ctx->builder, src, vec_type, ""); - ret = LLVMGetUndef(vec_type); - for (unsigned i = 0; i < bits / 32; i++) { - LLVMValueRef ret_comp; - - src = LLVMBuildExtractElement(ctx->builder, src_vector, - LLVMConstInt(ctx->i32, i, 0), ""); - - ret_comp = _ac_build_readlane(ctx, src, lane, - with_opt_barrier); - - ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp, - LLVMConstInt(ctx->i32, i, 0), ""); - } - } else { - ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier); - } - - if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) - return LLVMBuildIntToPtr(ctx->builder, ret, src_type, ""); - return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); + struct ac_image_args fmask_load = {}; + fmask_load.opcode = ac_image_load; + fmask_load.resource = fmask; + fmask_load.dmask = 0xf; + fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d; + fmask_load.attributes = AC_FUNC_ATTR_READNONE; + + fmask_load.coords[0] = addr[0]; + fmask_load.coords[1] = addr[1]; + if (is_array_tex) + fmask_load.coords[2] = addr[2]; + + LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); + fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, ""); + + /* Apply the formula. */ + unsigned sample_chan = is_array_tex ? 3 : 2; + LLVMValueRef final_sample; + final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], LLVMConstInt(ac->i32, 4, 0), ""); + final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, ""); + /* Mask the sample index by 0x7, because 0x8 means an unknown value + * with EQAA, so those will map to 0. */ + final_sample = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), ""); + + /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK + * resource descriptor is 0 (invalid). + */ + LLVMValueRef tmp; + tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, ""); + tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, ""); + tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, ""); + + /* Replace the MSAA sample index. */ + addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample, addr[sample_chan], ""); +} + +static LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMValueRef lane, bool with_opt_barrier) +{ + LLVMTypeRef type = LLVMTypeOf(src); + LLVMValueRef result; + + if (with_opt_barrier) + ac_build_optimization_barrier(ctx, &src); + + src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); + if (lane) + lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, ""); + + result = + ac_build_intrinsic(ctx, lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane", + ctx->i32, (LLVMValueRef[]){src, lane}, lane == NULL ? 1 : 2, + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + + return LLVMBuildTrunc(ctx->builder, result, type, ""); +} + +static LLVMValueRef ac_build_readlane_common(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMValueRef lane, bool with_opt_barrier) +{ + LLVMTypeRef src_type = LLVMTypeOf(src); + src = ac_to_integer(ctx, src); + unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); + LLVMValueRef ret; + + if (bits > 32) { + assert(bits % 32 == 0); + LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); + LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, ""); + ret = LLVMGetUndef(vec_type); + for (unsigned i = 0; i < bits / 32; i++) { + LLVMValueRef ret_comp; + + src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); + + ret_comp = _ac_build_readlane(ctx, src, lane, with_opt_barrier); + + ret = + LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); + } + } else { + ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier); + } + + if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) + return LLVMBuildIntToPtr(ctx->builder, ret, src_type, ""); + return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); } /** @@ -3692,429 +3299,433 @@ * @param lane - id of the lane or NULL for the first active lane * @return value of the lane */ -LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, - LLVMValueRef src, LLVMValueRef lane) +LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMValueRef lane) +{ + return ac_build_readlane_common(ctx, src, lane, false); +} + +LLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) { - return ac_build_readlane_common(ctx, src, lane, false); + return ac_build_readlane_common(ctx, src, lane, true); } +LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, + LLVMValueRef lane) +{ + return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32, + (LLVMValueRef[]){value, lane, src}, 3, + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); +} + +LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) +{ + if (ctx->wave_size == 32) { + return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, + (LLVMValueRef[]){mask, ctx->i32_0}, 2, AC_FUNC_ATTR_READNONE); + } + LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, ctx->v2i32, ""); + LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_0, ""); + LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_1, ""); + LLVMValueRef val = + ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, + (LLVMValueRef[]){mask_lo, ctx->i32_0}, 2, AC_FUNC_ATTR_READNONE); + val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, (LLVMValueRef[]){mask_hi, val}, + 2, AC_FUNC_ATTR_READNONE); + return val; +} -LLVMValueRef -ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) -{ - return ac_build_readlane_common(ctx, src, lane, true); -} - -LLVMValueRef -ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane) -{ - return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32, - (LLVMValueRef []) {value, lane, src}, 3, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); -} - -LLVMValueRef -ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) -{ - if (ctx->wave_size == 32) { - return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, - (LLVMValueRef []) { mask, ctx->i32_0 }, - 2, AC_FUNC_ATTR_READNONE); - } - LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, ctx->v2i32, ""); - LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec, - ctx->i32_0, ""); - LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec, - ctx->i32_1, ""); - LLVMValueRef val = - ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, - (LLVMValueRef []) { mask_lo, ctx->i32_0 }, - 2, AC_FUNC_ATTR_READNONE); - val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, - (LLVMValueRef []) { mask_hi, val }, - 2, AC_FUNC_ATTR_READNONE); - return val; -} - -enum dpp_ctrl { - _dpp_quad_perm = 0x000, - _dpp_row_sl = 0x100, - _dpp_row_sr = 0x110, - _dpp_row_rr = 0x120, - dpp_wf_sl1 = 0x130, - dpp_wf_rl1 = 0x134, - dpp_wf_sr1 = 0x138, - dpp_wf_rr1 = 0x13C, - dpp_row_mirror = 0x140, - dpp_row_half_mirror = 0x141, - dpp_row_bcast15 = 0x142, - dpp_row_bcast31 = 0x143 +enum dpp_ctrl +{ + _dpp_quad_perm = 0x000, + _dpp_row_sl = 0x100, + _dpp_row_sr = 0x110, + _dpp_row_rr = 0x120, + dpp_wf_sl1 = 0x130, + dpp_wf_rl1 = 0x134, + dpp_wf_sr1 = 0x138, + dpp_wf_rr1 = 0x13C, + dpp_row_mirror = 0x140, + dpp_row_half_mirror = 0x141, + dpp_row_bcast15 = 0x142, + dpp_row_bcast31 = 0x143 }; -static inline enum dpp_ctrl -dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) +static inline enum dpp_ctrl dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, + unsigned lane3) { - assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4); - return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6); + assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4); + return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6); } -static inline enum dpp_ctrl -dpp_row_sl(unsigned amount) -{ - assert(amount > 0 && amount < 16); - return _dpp_row_sl | amount; -} - -static inline enum dpp_ctrl -dpp_row_sr(unsigned amount) -{ - assert(amount > 0 && amount < 16); - return _dpp_row_sr | amount; -} - -static LLVMValueRef -_ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src, - enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask, - bool bound_ctrl) -{ - LLVMTypeRef type = LLVMTypeOf(src); - LLVMValueRef res; - - old = LLVMBuildZExt(ctx->builder, old, ctx->i32, ""); - src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); - - res = ac_build_intrinsic(ctx, "llvm.amdgcn.update.dpp.i32", ctx->i32, - (LLVMValueRef[]) { - old, src, - LLVMConstInt(ctx->i32, dpp_ctrl, 0), - LLVMConstInt(ctx->i32, row_mask, 0), - LLVMConstInt(ctx->i32, bank_mask, 0), - LLVMConstInt(ctx->i1, bound_ctrl, 0) }, - 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); - - return LLVMBuildTrunc(ctx->builder, res, type, ""); -} - -static LLVMValueRef -ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src, - enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask, - bool bound_ctrl) -{ - LLVMTypeRef src_type = LLVMTypeOf(src); - src = ac_to_integer(ctx, src); - old = ac_to_integer(ctx, old); - unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); - LLVMValueRef ret; - if (bits > 32) { - assert(bits % 32 == 0); - LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); - LLVMValueRef src_vector = - LLVMBuildBitCast(ctx->builder, src, vec_type, ""); - LLVMValueRef old_vector = - LLVMBuildBitCast(ctx->builder, old, vec_type, ""); - ret = LLVMGetUndef(vec_type); - for (unsigned i = 0; i < bits / 32; i++) { - src = LLVMBuildExtractElement(ctx->builder, src_vector, - LLVMConstInt(ctx->i32, i, - 0), ""); - old = LLVMBuildExtractElement(ctx->builder, old_vector, - LLVMConstInt(ctx->i32, i, - 0), ""); - LLVMValueRef ret_comp = _ac_build_dpp(ctx, old, src, - dpp_ctrl, - row_mask, - bank_mask, - bound_ctrl); - ret = LLVMBuildInsertElement(ctx->builder, ret, - ret_comp, - LLVMConstInt(ctx->i32, i, - 0), ""); - } - } else { - ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, - bank_mask, bound_ctrl); - } - return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); -} - -static LLVMValueRef -_ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel, - bool exchange_rows, bool bound_ctrl) -{ - LLVMTypeRef type = LLVMTypeOf(src); - LLVMValueRef result; - - src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); - - LLVMValueRef args[6] = { - src, - src, - LLVMConstInt(ctx->i32, sel, false), - LLVMConstInt(ctx->i32, sel >> 32, false), - ctx->i1true, /* fi */ - bound_ctrl ? ctx->i1true : ctx->i1false, - }; - - result = ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16" - : "llvm.amdgcn.permlane16", - ctx->i32, args, 6, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); - - return LLVMBuildTrunc(ctx->builder, result, type, ""); -} - -static LLVMValueRef -ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel, - bool exchange_rows, bool bound_ctrl) -{ - LLVMTypeRef src_type = LLVMTypeOf(src); - src = ac_to_integer(ctx, src); - unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); - LLVMValueRef ret; - if (bits > 32) { - assert(bits % 32 == 0); - LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); - LLVMValueRef src_vector = - LLVMBuildBitCast(ctx->builder, src, vec_type, ""); - ret = LLVMGetUndef(vec_type); - for (unsigned i = 0; i < bits / 32; i++) { - src = LLVMBuildExtractElement(ctx->builder, src_vector, - LLVMConstInt(ctx->i32, i, - 0), ""); - LLVMValueRef ret_comp = - _ac_build_permlane16(ctx, src, sel, - exchange_rows, - bound_ctrl); - ret = LLVMBuildInsertElement(ctx->builder, ret, - ret_comp, - LLVMConstInt(ctx->i32, i, - 0), ""); - } - } else { - ret = _ac_build_permlane16(ctx, src, sel, exchange_rows, - bound_ctrl); - } - return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); -} - -static inline unsigned -ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) -{ - assert(and_mask < 32 && or_mask < 32 && xor_mask < 32); - return and_mask | (or_mask << 5) | (xor_mask << 10); -} - -static LLVMValueRef -_ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask) -{ - LLVMTypeRef src_type = LLVMTypeOf(src); - LLVMValueRef ret; - - src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); - - ret = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", ctx->i32, - (LLVMValueRef []) { - src, LLVMConstInt(ctx->i32, mask, 0) }, - 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); - - return LLVMBuildTrunc(ctx->builder, ret, src_type, ""); -} - -LLVMValueRef -ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask) -{ - LLVMTypeRef src_type = LLVMTypeOf(src); - src = ac_to_integer(ctx, src); - unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); - LLVMValueRef ret; - if (bits > 32) { - assert(bits % 32 == 0); - LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); - LLVMValueRef src_vector = - LLVMBuildBitCast(ctx->builder, src, vec_type, ""); - ret = LLVMGetUndef(vec_type); - for (unsigned i = 0; i < bits / 32; i++) { - src = LLVMBuildExtractElement(ctx->builder, src_vector, - LLVMConstInt(ctx->i32, i, - 0), ""); - LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src, - mask); - ret = LLVMBuildInsertElement(ctx->builder, ret, - ret_comp, - LLVMConstInt(ctx->i32, i, - 0), ""); - } - } else { - ret = _ac_build_ds_swizzle(ctx, src, mask); - } - return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); -} - -static LLVMValueRef -ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src) -{ - LLVMTypeRef src_type = LLVMTypeOf(src); - unsigned bitsize = ac_get_elem_bits(ctx, src_type); - char name[32], type[8]; - LLVMValueRef ret; - - src = ac_to_integer(ctx, src); - - if (bitsize < 32) - src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); - - ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); - snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type); - ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), - (LLVMValueRef []) { src }, 1, - AC_FUNC_ATTR_READNONE); - - if (bitsize < 32) - ret = LLVMBuildTrunc(ctx->builder, ret, - ac_to_integer_type(ctx, src_type), ""); - - return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); -} - -static LLVMValueRef -ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src, - LLVMValueRef inactive) -{ - char name[33], type[8]; - LLVMTypeRef src_type = LLVMTypeOf(src); - unsigned bitsize = ac_get_elem_bits(ctx, src_type); - src = ac_to_integer(ctx, src); - inactive = ac_to_integer(ctx, inactive); - - if (bitsize < 32) { - src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); - inactive = LLVMBuildZExt(ctx->builder, inactive, ctx->i32, ""); - } - - ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); - snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type); - LLVMValueRef ret = - ac_build_intrinsic(ctx, name, - LLVMTypeOf(src), (LLVMValueRef []) { - src, inactive }, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - if (bitsize < 32) - ret = LLVMBuildTrunc(ctx->builder, ret, src_type, ""); - - return ret; -} - -static LLVMValueRef -get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, unsigned type_size) -{ - if (type_size == 1) { - switch (op) { - case nir_op_iadd: return ctx->i8_0; - case nir_op_imul: return ctx->i8_1; - case nir_op_imin: return LLVMConstInt(ctx->i8, INT8_MAX, 0); - case nir_op_umin: return LLVMConstInt(ctx->i8, UINT8_MAX, 0); - case nir_op_imax: return LLVMConstInt(ctx->i8, INT8_MIN, 0); - case nir_op_umax: return ctx->i8_0; - case nir_op_iand: return LLVMConstInt(ctx->i8, -1, 0); - case nir_op_ior: return ctx->i8_0; - case nir_op_ixor: return ctx->i8_0; - default: - unreachable("bad reduction intrinsic"); - } - } else if (type_size == 2) { - switch (op) { - case nir_op_iadd: return ctx->i16_0; - case nir_op_fadd: return ctx->f16_0; - case nir_op_imul: return ctx->i16_1; - case nir_op_fmul: return ctx->f16_1; - case nir_op_imin: return LLVMConstInt(ctx->i16, INT16_MAX, 0); - case nir_op_umin: return LLVMConstInt(ctx->i16, UINT16_MAX, 0); - case nir_op_fmin: return LLVMConstReal(ctx->f16, INFINITY); - case nir_op_imax: return LLVMConstInt(ctx->i16, INT16_MIN, 0); - case nir_op_umax: return ctx->i16_0; - case nir_op_fmax: return LLVMConstReal(ctx->f16, -INFINITY); - case nir_op_iand: return LLVMConstInt(ctx->i16, -1, 0); - case nir_op_ior: return ctx->i16_0; - case nir_op_ixor: return ctx->i16_0; - default: - unreachable("bad reduction intrinsic"); - } - } else if (type_size == 4) { - switch (op) { - case nir_op_iadd: return ctx->i32_0; - case nir_op_fadd: return ctx->f32_0; - case nir_op_imul: return ctx->i32_1; - case nir_op_fmul: return ctx->f32_1; - case nir_op_imin: return LLVMConstInt(ctx->i32, INT32_MAX, 0); - case nir_op_umin: return LLVMConstInt(ctx->i32, UINT32_MAX, 0); - case nir_op_fmin: return LLVMConstReal(ctx->f32, INFINITY); - case nir_op_imax: return LLVMConstInt(ctx->i32, INT32_MIN, 0); - case nir_op_umax: return ctx->i32_0; - case nir_op_fmax: return LLVMConstReal(ctx->f32, -INFINITY); - case nir_op_iand: return LLVMConstInt(ctx->i32, -1, 0); - case nir_op_ior: return ctx->i32_0; - case nir_op_ixor: return ctx->i32_0; - default: - unreachable("bad reduction intrinsic"); - } - } else { /* type_size == 64bit */ - switch (op) { - case nir_op_iadd: return ctx->i64_0; - case nir_op_fadd: return ctx->f64_0; - case nir_op_imul: return ctx->i64_1; - case nir_op_fmul: return ctx->f64_1; - case nir_op_imin: return LLVMConstInt(ctx->i64, INT64_MAX, 0); - case nir_op_umin: return LLVMConstInt(ctx->i64, UINT64_MAX, 0); - case nir_op_fmin: return LLVMConstReal(ctx->f64, INFINITY); - case nir_op_imax: return LLVMConstInt(ctx->i64, INT64_MIN, 0); - case nir_op_umax: return ctx->i64_0; - case nir_op_fmax: return LLVMConstReal(ctx->f64, -INFINITY); - case nir_op_iand: return LLVMConstInt(ctx->i64, -1, 0); - case nir_op_ior: return ctx->i64_0; - case nir_op_ixor: return ctx->i64_0; - default: - unreachable("bad reduction intrinsic"); - } - } -} - -static LLVMValueRef -ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, nir_op op) -{ - bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8; - bool _32bit = ac_get_type_size(LLVMTypeOf(lhs)) == 4; - switch (op) { - case nir_op_iadd: return LLVMBuildAdd(ctx->builder, lhs, rhs, ""); - case nir_op_fadd: return LLVMBuildFAdd(ctx->builder, lhs, rhs, ""); - case nir_op_imul: return LLVMBuildMul(ctx->builder, lhs, rhs, ""); - case nir_op_fmul: return LLVMBuildFMul(ctx->builder, lhs, rhs, ""); - case nir_op_imin: return LLVMBuildSelect(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""), - lhs, rhs, ""); - case nir_op_umin: return LLVMBuildSelect(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""), - lhs, rhs, ""); - case nir_op_fmin: return ac_build_intrinsic(ctx, - _64bit ? "llvm.minnum.f64" : _32bit ? "llvm.minnum.f32" : "llvm.minnum.f16", - _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, - (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE); - case nir_op_imax: return LLVMBuildSelect(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""), - lhs, rhs, ""); - case nir_op_umax: return LLVMBuildSelect(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""), - lhs, rhs, ""); - case nir_op_fmax: return ac_build_intrinsic(ctx, - _64bit ? "llvm.maxnum.f64" : _32bit ? "llvm.maxnum.f32" : "llvm.maxnum.f16", - _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, - (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE); - case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, ""); - case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, ""); - case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, ""); - default: - unreachable("bad reduction intrinsic"); - } +static inline enum dpp_ctrl dpp_row_sl(unsigned amount) +{ + assert(amount > 0 && amount < 16); + return _dpp_row_sl | amount; +} + +static inline enum dpp_ctrl dpp_row_sr(unsigned amount) +{ + assert(amount > 0 && amount < 16); + return _dpp_row_sr | amount; +} + +static LLVMValueRef _ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src, + enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask, + bool bound_ctrl) +{ + LLVMTypeRef type = LLVMTypeOf(src); + LLVMValueRef res; + + old = LLVMBuildZExt(ctx->builder, old, ctx->i32, ""); + src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); + + res = ac_build_intrinsic( + ctx, "llvm.amdgcn.update.dpp.i32", ctx->i32, + (LLVMValueRef[]){old, src, LLVMConstInt(ctx->i32, dpp_ctrl, 0), + LLVMConstInt(ctx->i32, row_mask, 0), LLVMConstInt(ctx->i32, bank_mask, 0), + LLVMConstInt(ctx->i1, bound_ctrl, 0)}, + 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + + return LLVMBuildTrunc(ctx->builder, res, type, ""); +} + +static LLVMValueRef ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src, + enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask, + bool bound_ctrl) +{ + LLVMTypeRef src_type = LLVMTypeOf(src); + src = ac_to_integer(ctx, src); + old = ac_to_integer(ctx, old); + unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); + LLVMValueRef ret; + if (bits > 32) { + assert(bits % 32 == 0); + LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); + LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, ""); + LLVMValueRef old_vector = LLVMBuildBitCast(ctx->builder, old, vec_type, ""); + ret = LLVMGetUndef(vec_type); + for (unsigned i = 0; i < bits / 32; i++) { + src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); + old = LLVMBuildExtractElement(ctx->builder, old_vector, LLVMConstInt(ctx->i32, i, 0), ""); + LLVMValueRef ret_comp = + _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); + ret = + LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); + } + } else { + ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); + } + return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); +} + +static LLVMValueRef _ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, + uint64_t sel, bool exchange_rows, bool bound_ctrl) +{ + LLVMTypeRef type = LLVMTypeOf(src); + LLVMValueRef result; + + src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); + + LLVMValueRef args[6] = { + src, + src, + LLVMConstInt(ctx->i32, sel, false), + LLVMConstInt(ctx->i32, sel >> 32, false), + ctx->i1true, /* fi */ + bound_ctrl ? ctx->i1true : ctx->i1false, + }; + + result = + ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16" : "llvm.amdgcn.permlane16", + ctx->i32, args, 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + + return LLVMBuildTrunc(ctx->builder, result, type, ""); +} + +static LLVMValueRef ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel, + bool exchange_rows, bool bound_ctrl) +{ + LLVMTypeRef src_type = LLVMTypeOf(src); + src = ac_to_integer(ctx, src); + unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); + LLVMValueRef ret; + if (bits > 32) { + assert(bits % 32 == 0); + LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); + LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, ""); + ret = LLVMGetUndef(vec_type); + for (unsigned i = 0; i < bits / 32; i++) { + src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); + LLVMValueRef ret_comp = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl); + ret = + LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); + } + } else { + ret = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl); + } + return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); +} + +static inline unsigned ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) +{ + assert(and_mask < 32 && or_mask < 32 && xor_mask < 32); + return and_mask | (or_mask << 5) | (xor_mask << 10); +} + +static LLVMValueRef _ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, + unsigned mask) +{ + LLVMTypeRef src_type = LLVMTypeOf(src); + LLVMValueRef ret; + + src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); + + ret = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", ctx->i32, + (LLVMValueRef[]){src, LLVMConstInt(ctx->i32, mask, 0)}, 2, + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + + return LLVMBuildTrunc(ctx->builder, ret, src_type, ""); +} + +LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask) +{ + LLVMTypeRef src_type = LLVMTypeOf(src); + src = ac_to_integer(ctx, src); + unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); + LLVMValueRef ret; + if (bits > 32) { + assert(bits % 32 == 0); + LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); + LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, ""); + ret = LLVMGetUndef(vec_type); + for (unsigned i = 0; i < bits / 32; i++) { + src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); + LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src, mask); + ret = + LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); + } + } else { + ret = _ac_build_ds_swizzle(ctx, src, mask); + } + return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); +} + +static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src) +{ + LLVMTypeRef src_type = LLVMTypeOf(src); + unsigned bitsize = ac_get_elem_bits(ctx, src_type); + char name[32], type[8]; + LLVMValueRef ret; + + src = ac_to_integer(ctx, src); + + if (bitsize < 32) + src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); + + ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type); + ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1, + AC_FUNC_ATTR_READNONE); + + if (bitsize < 32) + ret = LLVMBuildTrunc(ctx->builder, ret, ac_to_integer_type(ctx, src_type), ""); + + return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); +} + +static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMValueRef inactive) +{ + char name[33], type[8]; + LLVMTypeRef src_type = LLVMTypeOf(src); + unsigned bitsize = ac_get_elem_bits(ctx, src_type); + src = ac_to_integer(ctx, src); + inactive = ac_to_integer(ctx, inactive); + + if (bitsize < 32) { + src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); + inactive = LLVMBuildZExt(ctx->builder, inactive, ctx->i32, ""); + } + + ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type); + LLVMValueRef ret = + ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src, inactive}, 2, + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + if (bitsize < 32) + ret = LLVMBuildTrunc(ctx->builder, ret, src_type, ""); + + return ret; +} + +static LLVMValueRef get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, + unsigned type_size) +{ + if (type_size == 1) { + switch (op) { + case nir_op_iadd: + return ctx->i8_0; + case nir_op_imul: + return ctx->i8_1; + case nir_op_imin: + return LLVMConstInt(ctx->i8, INT8_MAX, 0); + case nir_op_umin: + return LLVMConstInt(ctx->i8, UINT8_MAX, 0); + case nir_op_imax: + return LLVMConstInt(ctx->i8, INT8_MIN, 0); + case nir_op_umax: + return ctx->i8_0; + case nir_op_iand: + return LLVMConstInt(ctx->i8, -1, 0); + case nir_op_ior: + return ctx->i8_0; + case nir_op_ixor: + return ctx->i8_0; + default: + unreachable("bad reduction intrinsic"); + } + } else if (type_size == 2) { + switch (op) { + case nir_op_iadd: + return ctx->i16_0; + case nir_op_fadd: + return ctx->f16_0; + case nir_op_imul: + return ctx->i16_1; + case nir_op_fmul: + return ctx->f16_1; + case nir_op_imin: + return LLVMConstInt(ctx->i16, INT16_MAX, 0); + case nir_op_umin: + return LLVMConstInt(ctx->i16, UINT16_MAX, 0); + case nir_op_fmin: + return LLVMConstReal(ctx->f16, INFINITY); + case nir_op_imax: + return LLVMConstInt(ctx->i16, INT16_MIN, 0); + case nir_op_umax: + return ctx->i16_0; + case nir_op_fmax: + return LLVMConstReal(ctx->f16, -INFINITY); + case nir_op_iand: + return LLVMConstInt(ctx->i16, -1, 0); + case nir_op_ior: + return ctx->i16_0; + case nir_op_ixor: + return ctx->i16_0; + default: + unreachable("bad reduction intrinsic"); + } + } else if (type_size == 4) { + switch (op) { + case nir_op_iadd: + return ctx->i32_0; + case nir_op_fadd: + return ctx->f32_0; + case nir_op_imul: + return ctx->i32_1; + case nir_op_fmul: + return ctx->f32_1; + case nir_op_imin: + return LLVMConstInt(ctx->i32, INT32_MAX, 0); + case nir_op_umin: + return LLVMConstInt(ctx->i32, UINT32_MAX, 0); + case nir_op_fmin: + return LLVMConstReal(ctx->f32, INFINITY); + case nir_op_imax: + return LLVMConstInt(ctx->i32, INT32_MIN, 0); + case nir_op_umax: + return ctx->i32_0; + case nir_op_fmax: + return LLVMConstReal(ctx->f32, -INFINITY); + case nir_op_iand: + return LLVMConstInt(ctx->i32, -1, 0); + case nir_op_ior: + return ctx->i32_0; + case nir_op_ixor: + return ctx->i32_0; + default: + unreachable("bad reduction intrinsic"); + } + } else { /* type_size == 64bit */ + switch (op) { + case nir_op_iadd: + return ctx->i64_0; + case nir_op_fadd: + return ctx->f64_0; + case nir_op_imul: + return ctx->i64_1; + case nir_op_fmul: + return ctx->f64_1; + case nir_op_imin: + return LLVMConstInt(ctx->i64, INT64_MAX, 0); + case nir_op_umin: + return LLVMConstInt(ctx->i64, UINT64_MAX, 0); + case nir_op_fmin: + return LLVMConstReal(ctx->f64, INFINITY); + case nir_op_imax: + return LLVMConstInt(ctx->i64, INT64_MIN, 0); + case nir_op_umax: + return ctx->i64_0; + case nir_op_fmax: + return LLVMConstReal(ctx->f64, -INFINITY); + case nir_op_iand: + return LLVMConstInt(ctx->i64, -1, 0); + case nir_op_ior: + return ctx->i64_0; + case nir_op_ixor: + return ctx->i64_0; + default: + unreachable("bad reduction intrinsic"); + } + } +} + +static LLVMValueRef ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, + nir_op op) +{ + bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8; + bool _32bit = ac_get_type_size(LLVMTypeOf(lhs)) == 4; + switch (op) { + case nir_op_iadd: + return LLVMBuildAdd(ctx->builder, lhs, rhs, ""); + case nir_op_fadd: + return LLVMBuildFAdd(ctx->builder, lhs, rhs, ""); + case nir_op_imul: + return LLVMBuildMul(ctx->builder, lhs, rhs, ""); + case nir_op_fmul: + return LLVMBuildFMul(ctx->builder, lhs, rhs, ""); + case nir_op_imin: + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""), + lhs, rhs, ""); + case nir_op_umin: + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""), + lhs, rhs, ""); + case nir_op_fmin: + return ac_build_intrinsic( + ctx, _64bit ? "llvm.minnum.f64" : _32bit ? "llvm.minnum.f32" : "llvm.minnum.f16", + _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2, + AC_FUNC_ATTR_READNONE); + case nir_op_imax: + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""), + lhs, rhs, ""); + case nir_op_umax: + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""), + lhs, rhs, ""); + case nir_op_fmax: + return ac_build_intrinsic( + ctx, _64bit ? "llvm.maxnum.f64" : _32bit ? "llvm.maxnum.f32" : "llvm.maxnum.f16", + _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2, + AC_FUNC_ATTR_READNONE); + case nir_op_iand: + return LLVMBuildAnd(ctx->builder, lhs, rhs, ""); + case nir_op_ior: + return LLVMBuildOr(ctx->builder, lhs, rhs, ""); + case nir_op_ixor: + return LLVMBuildXor(ctx->builder, lhs, rhs, ""); + default: + unreachable("bad reduction intrinsic"); + } } /** @@ -4124,297 +3735,292 @@ * prefix of this many threads * \return src, shifted 1 lane up, and identity shifted into lane 0. */ -static LLVMValueRef -ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src, - LLVMValueRef identity, unsigned maxprefix) -{ - if (ctx->chip_class >= GFX10) { - /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */ - LLVMValueRef active, tmp1, tmp2; - LLVMValueRef tid = ac_get_thread_id(ctx); - - tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false); - - tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false); - - if (maxprefix > 32) { - active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, - LLVMConstInt(ctx->i32, 32, false), ""); - - tmp2 = LLVMBuildSelect(ctx->builder, active, - ac_build_readlane(ctx, src, - LLVMConstInt(ctx->i32, 31, false)), - tmp2, ""); - - active = LLVMBuildOr(ctx->builder, active, - LLVMBuildICmp(ctx->builder, LLVMIntEQ, - LLVMBuildAnd(ctx->builder, tid, - LLVMConstInt(ctx->i32, 0x1f, false), ""), - LLVMConstInt(ctx->i32, 0x10, false), ""), ""); - return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); - } else if (maxprefix > 16) { - active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, - LLVMConstInt(ctx->i32, 16, false), ""); - - return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); - } - } else if (ctx->chip_class >= GFX8) { - return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false); - } - - /* wavefront shift_right by 1 on SI/CI */ - LLVMValueRef active, tmp1, tmp2; - LLVMValueRef tid = ac_get_thread_id(ctx); - tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2)); - tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00)); - active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, - LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""), - LLVMConstInt(ctx->i32, 0x4, 0), ""); - tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); - tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00)); - active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, - LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""), - LLVMConstInt(ctx->i32, 0x8, 0), ""); - tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); - tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00)); - active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, - LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""), - LLVMConstInt(ctx->i32, 0x10, 0), ""); - tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); - tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0)); - active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), ""); - tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); - active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), ""); - return LLVMBuildSelect(ctx->builder, active, identity, tmp1, ""); +static LLVMValueRef ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMValueRef identity, unsigned maxprefix) +{ + if (ctx->chip_class >= GFX10) { + /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */ + LLVMValueRef active, tmp1, tmp2; + LLVMValueRef tid = ac_get_thread_id(ctx); + + tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false); + + tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false); + + if (maxprefix > 32) { + active = + LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, false), ""); + + tmp2 = LLVMBuildSelect(ctx->builder, active, + ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, false)), + tmp2, ""); + + active = LLVMBuildOr( + ctx->builder, active, + LLVMBuildICmp(ctx->builder, LLVMIntEQ, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, false), ""), + LLVMConstInt(ctx->i32, 0x10, false), ""), + ""); + return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); + } else if (maxprefix > 16) { + active = + LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 16, false), ""); + + return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); + } + } else if (ctx->chip_class >= GFX8) { + return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false); + } + + /* wavefront shift_right by 1 on SI/CI */ + LLVMValueRef active, tmp1, tmp2; + LLVMValueRef tid = ac_get_thread_id(ctx); + tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2)); + tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00)); + active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""), + LLVMConstInt(ctx->i32, 0x4, 0), ""); + tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); + tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00)); + active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""), + LLVMConstInt(ctx->i32, 0x8, 0), ""); + tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); + tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00)); + active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""), + LLVMConstInt(ctx->i32, 0x10, 0), ""); + tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); + tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0)); + active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), ""); + tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); + active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), ""); + return LLVMBuildSelect(ctx->builder, active, identity, tmp1, ""); } /** * \param maxprefix specifies that the result only needs to be correct for a * prefix of this many threads */ -static LLVMValueRef -ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity, - unsigned maxprefix, bool inclusive) -{ - LLVMValueRef result, tmp; - - if (!inclusive) - src = ac_wavefront_shift_right_1(ctx, src, identity, maxprefix); - - result = src; - - if (ctx->chip_class <= GFX7) { - assert(maxprefix == 64); - LLVMValueRef tid = ac_get_thread_id(ctx); - LLVMValueRef active; - tmp = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x1e, 0x00, 0x00)); - active = LLVMBuildICmp(ctx->builder, LLVMIntNE, - LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""), - ctx->i32_0, ""); - tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); - result = ac_build_alu_op(ctx, result, tmp, op); - tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1c, 0x01, 0x00)); - active = LLVMBuildICmp(ctx->builder, LLVMIntNE, - LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 2, 0), ""), - ctx->i32_0, ""); - tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); - result = ac_build_alu_op(ctx, result, tmp, op); - tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x18, 0x03, 0x00)); - active = LLVMBuildICmp(ctx->builder, LLVMIntNE, - LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 4, 0), ""), - ctx->i32_0, ""); - tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); - result = ac_build_alu_op(ctx, result, tmp, op); - tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x10, 0x07, 0x00)); - active = LLVMBuildICmp(ctx->builder, LLVMIntNE, - LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 8, 0), ""), - ctx->i32_0, ""); - tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); - result = ac_build_alu_op(ctx, result, tmp, op); - tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x00, 0x0f, 0x00)); - active = LLVMBuildICmp(ctx->builder, LLVMIntNE, - LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, 0), ""), - ctx->i32_0, ""); - tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); - result = ac_build_alu_op(ctx, result, tmp, op); - tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 0)); - active = LLVMBuildICmp(ctx->builder, LLVMIntNE, - LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 32, 0), ""), - ctx->i32_0, ""); - tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); - result = ac_build_alu_op(ctx, result, tmp, op); - return result; - } - - if (maxprefix <= 1) - return result; - tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false); - result = ac_build_alu_op(ctx, result, tmp, op); - if (maxprefix <= 2) - return result; - tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false); - result = ac_build_alu_op(ctx, result, tmp, op); - if (maxprefix <= 3) - return result; - tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false); - result = ac_build_alu_op(ctx, result, tmp, op); - if (maxprefix <= 4) - return result; - tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false); - result = ac_build_alu_op(ctx, result, tmp, op); - if (maxprefix <= 8) - return result; - tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false); - result = ac_build_alu_op(ctx, result, tmp, op); - if (maxprefix <= 16) - return result; - - if (ctx->chip_class >= GFX10) { - LLVMValueRef tid = ac_get_thread_id(ctx); - LLVMValueRef active; - - tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false); - - active = LLVMBuildICmp(ctx->builder, LLVMIntNE, - LLVMBuildAnd(ctx->builder, tid, - LLVMConstInt(ctx->i32, 16, false), ""), - ctx->i32_0, ""); - - tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); - - result = ac_build_alu_op(ctx, result, tmp, op); - - if (maxprefix <= 32) - return result; - - tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false)); - - active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid, - LLVMConstInt(ctx->i32, 32, false), ""); - - tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); - - result = ac_build_alu_op(ctx, result, tmp, op); - return result; - } - - tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); - result = ac_build_alu_op(ctx, result, tmp, op); - if (maxprefix <= 32) - return result; - tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); - result = ac_build_alu_op(ctx, result, tmp, op); - return result; -} - -LLVMValueRef -ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) -{ - LLVMValueRef result; - - if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { - LLVMBuilderRef builder = ctx->builder; - src = LLVMBuildZExt(builder, src, ctx->i32, ""); - result = ac_build_ballot(ctx, src); - result = ac_build_mbcnt(ctx, result); - result = LLVMBuildAdd(builder, result, src, ""); - return result; - } - - ac_build_optimization_barrier(ctx, &src); - - LLVMValueRef identity = - get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); - result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), - LLVMTypeOf(identity), ""); - result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true); - - return ac_build_wwm(ctx, result); -} - -LLVMValueRef -ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) -{ - LLVMValueRef result; - - if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { - LLVMBuilderRef builder = ctx->builder; - src = LLVMBuildZExt(builder, src, ctx->i32, ""); - result = ac_build_ballot(ctx, src); - result = ac_build_mbcnt(ctx, result); - return result; - } - - ac_build_optimization_barrier(ctx, &src); - - LLVMValueRef identity = - get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); - result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), - LLVMTypeOf(identity), ""); - result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false); - - return ac_build_wwm(ctx, result); -} - -LLVMValueRef -ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size) -{ - if (cluster_size == 1) return src; - ac_build_optimization_barrier(ctx, &src); - LLVMValueRef result, swap; - LLVMValueRef identity = get_reduction_identity(ctx, op, - ac_get_type_size(LLVMTypeOf(src))); - result = LLVMBuildBitCast(ctx->builder, - ac_build_set_inactive(ctx, src, identity), - LLVMTypeOf(identity), ""); - swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2); - result = ac_build_alu_op(ctx, result, swap, op); - if (cluster_size == 2) return ac_build_wwm(ctx, result); - - swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1); - result = ac_build_alu_op(ctx, result, swap, op); - if (cluster_size == 4) return ac_build_wwm(ctx, result); - - if (ctx->chip_class >= GFX8) - swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false); - else - swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04)); - result = ac_build_alu_op(ctx, result, swap, op); - if (cluster_size == 8) return ac_build_wwm(ctx, result); - - if (ctx->chip_class >= GFX8) - swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false); - else - swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08)); - result = ac_build_alu_op(ctx, result, swap, op); - if (cluster_size == 16) return ac_build_wwm(ctx, result); - - if (ctx->chip_class >= GFX10) - swap = ac_build_permlane16(ctx, result, 0, true, false); - else if (ctx->chip_class >= GFX8 && cluster_size != 32) - swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); - else - swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10)); - result = ac_build_alu_op(ctx, result, swap, op); - if (cluster_size == 32) return ac_build_wwm(ctx, result); - - if (ctx->chip_class >= GFX8) { - if (ctx->wave_size == 64) { - if (ctx->chip_class >= GFX10) - swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false)); - else - swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); - result = ac_build_alu_op(ctx, result, swap, op); - result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0)); - } - - return ac_build_wwm(ctx, result); - } else { - swap = ac_build_readlane(ctx, result, ctx->i32_0); - result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0)); - result = ac_build_alu_op(ctx, result, swap, op); - return ac_build_wwm(ctx, result); - } +static LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, + LLVMValueRef identity, unsigned maxprefix, bool inclusive) +{ + LLVMValueRef result, tmp; + + if (!inclusive) + src = ac_wavefront_shift_right_1(ctx, src, identity, maxprefix); + + result = src; + + if (ctx->chip_class <= GFX7) { + assert(maxprefix == 64); + LLVMValueRef tid = ac_get_thread_id(ctx); + LLVMValueRef active; + tmp = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x1e, 0x00, 0x00)); + active = LLVMBuildICmp(ctx->builder, LLVMIntNE, + LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""), ctx->i32_0, ""); + tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); + result = ac_build_alu_op(ctx, result, tmp, op); + tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1c, 0x01, 0x00)); + active = LLVMBuildICmp(ctx->builder, LLVMIntNE, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 2, 0), ""), + ctx->i32_0, ""); + tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); + result = ac_build_alu_op(ctx, result, tmp, op); + tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x18, 0x03, 0x00)); + active = LLVMBuildICmp(ctx->builder, LLVMIntNE, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 4, 0), ""), + ctx->i32_0, ""); + tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); + result = ac_build_alu_op(ctx, result, tmp, op); + tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x10, 0x07, 0x00)); + active = LLVMBuildICmp(ctx->builder, LLVMIntNE, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 8, 0), ""), + ctx->i32_0, ""); + tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); + result = ac_build_alu_op(ctx, result, tmp, op); + tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x00, 0x0f, 0x00)); + active = LLVMBuildICmp(ctx->builder, LLVMIntNE, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, 0), ""), + ctx->i32_0, ""); + tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); + result = ac_build_alu_op(ctx, result, tmp, op); + tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 0)); + active = LLVMBuildICmp(ctx->builder, LLVMIntNE, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 32, 0), ""), + ctx->i32_0, ""); + tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); + result = ac_build_alu_op(ctx, result, tmp, op); + return result; + } + + if (maxprefix <= 1) + return result; + tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false); + result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 2) + return result; + tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false); + result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 3) + return result; + tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false); + result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 4) + return result; + tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false); + result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 8) + return result; + tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false); + result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 16) + return result; + + if (ctx->chip_class >= GFX10) { + LLVMValueRef tid = ac_get_thread_id(ctx); + LLVMValueRef active; + + tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false); + + active = LLVMBuildICmp(ctx->builder, LLVMIntNE, + LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, false), ""), + ctx->i32_0, ""); + + tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); + + result = ac_build_alu_op(ctx, result, tmp, op); + + if (maxprefix <= 32) + return result; + + tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false)); + + active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid, LLVMConstInt(ctx->i32, 32, false), ""); + + tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); + + result = ac_build_alu_op(ctx, result, tmp, op); + return result; + } + + tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); + result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 32) + return result; + tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); + result = ac_build_alu_op(ctx, result, tmp, op); + return result; +} + +LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) +{ + LLVMValueRef result; + + if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { + LLVMBuilderRef builder = ctx->builder; + src = LLVMBuildZExt(builder, src, ctx->i32, ""); + result = ac_build_ballot(ctx, src); + result = ac_build_mbcnt(ctx, result); + result = LLVMBuildAdd(builder, result, src, ""); + return result; + } + + ac_build_optimization_barrier(ctx, &src); + + LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); + result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), + LLVMTypeOf(identity), ""); + result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true); + + return ac_build_wwm(ctx, result); +} + +LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) +{ + LLVMValueRef result; + + if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { + LLVMBuilderRef builder = ctx->builder; + src = LLVMBuildZExt(builder, src, ctx->i32, ""); + result = ac_build_ballot(ctx, src); + result = ac_build_mbcnt(ctx, result); + return result; + } + + ac_build_optimization_barrier(ctx, &src); + + LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); + result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), + LLVMTypeOf(identity), ""); + result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false); + + return ac_build_wwm(ctx, result); +} + +LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, + unsigned cluster_size) +{ + if (cluster_size == 1) + return src; + ac_build_optimization_barrier(ctx, &src); + LLVMValueRef result, swap; + LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); + result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), + LLVMTypeOf(identity), ""); + swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2); + result = ac_build_alu_op(ctx, result, swap, op); + if (cluster_size == 2) + return ac_build_wwm(ctx, result); + + swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1); + result = ac_build_alu_op(ctx, result, swap, op); + if (cluster_size == 4) + return ac_build_wwm(ctx, result); + + if (ctx->chip_class >= GFX8) + swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false); + else + swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04)); + result = ac_build_alu_op(ctx, result, swap, op); + if (cluster_size == 8) + return ac_build_wwm(ctx, result); + + if (ctx->chip_class >= GFX8) + swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false); + else + swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08)); + result = ac_build_alu_op(ctx, result, swap, op); + if (cluster_size == 16) + return ac_build_wwm(ctx, result); + + if (ctx->chip_class >= GFX10) + swap = ac_build_permlane16(ctx, result, 0, true, false); + else if (ctx->chip_class >= GFX8 && cluster_size != 32) + swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); + else + swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10)); + result = ac_build_alu_op(ctx, result, swap, op); + if (cluster_size == 32) + return ac_build_wwm(ctx, result); + + if (ctx->chip_class >= GFX8) { + if (ctx->wave_size == 64) { + if (ctx->chip_class >= GFX10) + swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false)); + else + swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); + result = ac_build_alu_op(ctx, result, swap, op); + result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0)); + } + + return ac_build_wwm(ctx, result); + } else { + swap = ac_build_readlane(ctx, result, ctx->i32_0); + result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0)); + result = ac_build_alu_op(ctx, result, swap, op); + return ac_build_wwm(ctx, result); + } } /** @@ -4424,21 +4030,20 @@ * The source value must be present in the highest lane of the wave, and the * highest lane must be live. */ -void -ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +void ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) { - if (ws->maxwaves <= 1) - return; + if (ws->maxwaves <= 1) + return; - const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false); - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef tid = ac_get_thread_id(ctx); - LLVMValueRef tmp; - - tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, ""); - ac_build_ifcc(ctx, tmp, 1000); - LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, "")); - ac_build_endif(ctx, 1000); + const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false); + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef tid = ac_get_thread_id(ctx); + LLVMValueRef tmp; + + tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, ""); + ac_build_ifcc(ctx, tmp, 1000); + LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, "")); + ac_build_endif(ctx, 1000); } /** @@ -4447,61 +4052,59 @@ * * The caller must place a barrier between the top and bottom halves. */ -void -ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +void ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) { - const LLVMTypeRef type = LLVMTypeOf(ws->src); - const LLVMValueRef identity = - get_reduction_identity(ctx, ws->op, ac_get_type_size(type)); - - if (ws->maxwaves <= 1) { - ws->result_reduce = ws->src; - ws->result_inclusive = ws->src; - ws->result_exclusive = identity; - return; - } - assert(ws->maxwaves <= 32); - - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef tid = ac_get_thread_id(ctx); - LLVMBasicBlockRef bbs[2]; - LLVMValueRef phivalues_scan[2]; - LLVMValueRef tmp, tmp2; - - bbs[0] = LLVMGetInsertBlock(builder); - phivalues_scan[0] = LLVMGetUndef(type); - - if (ws->enable_reduce) - tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, ""); - else if (ws->enable_inclusive) - tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, ""); - else - tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, ""); - ac_build_ifcc(ctx, tmp, 1001); - { - tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), ""); - - ac_build_optimization_barrier(ctx, &tmp); - - bbs[1] = LLVMGetInsertBlock(builder); - phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true); - } - ac_build_endif(ctx, 1001); - - const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs); - - if (ws->enable_reduce) { - tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, ""); - ws->result_reduce = ac_build_readlane(ctx, scan, tmp); - } - if (ws->enable_inclusive) - ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx); - if (ws->enable_exclusive) { - tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, ""); - tmp = ac_build_readlane(ctx, scan, tmp); - tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, ""); - ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, ""); - } + const LLVMTypeRef type = LLVMTypeOf(ws->src); + const LLVMValueRef identity = get_reduction_identity(ctx, ws->op, ac_get_type_size(type)); + + if (ws->maxwaves <= 1) { + ws->result_reduce = ws->src; + ws->result_inclusive = ws->src; + ws->result_exclusive = identity; + return; + } + assert(ws->maxwaves <= 32); + + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef tid = ac_get_thread_id(ctx); + LLVMBasicBlockRef bbs[2]; + LLVMValueRef phivalues_scan[2]; + LLVMValueRef tmp, tmp2; + + bbs[0] = LLVMGetInsertBlock(builder); + phivalues_scan[0] = LLVMGetUndef(type); + + if (ws->enable_reduce) + tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, ""); + else if (ws->enable_inclusive) + tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, ""); + else + tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, ""); + ac_build_ifcc(ctx, tmp, 1001); + { + tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), ""); + + ac_build_optimization_barrier(ctx, &tmp); + + bbs[1] = LLVMGetInsertBlock(builder); + phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true); + } + ac_build_endif(ctx, 1001); + + const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs); + + if (ws->enable_reduce) { + tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, ""); + ws->result_reduce = ac_build_readlane(ctx, scan, tmp); + } + if (ws->enable_inclusive) + ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx); + if (ws->enable_exclusive) { + tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, ""); + tmp = ac_build_readlane(ctx, scan, tmp); + tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, ""); + ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, ""); + } } /** @@ -4513,12 +4116,11 @@ * of the workgroup are live. (This requirement cannot easily be relaxed in a * useful manner because of the barrier in the algorithm.) */ -void -ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +void ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) { - ac_build_wg_wavescan_top(ctx, ws); - ac_build_s_barrier(ctx); - ac_build_wg_wavescan_bottom(ctx, ws); + ac_build_wg_wavescan_top(ctx, ws); + ac_build_s_barrier(ctx); + ac_build_wg_wavescan_bottom(ctx, ws); } /** @@ -4527,25 +4129,24 @@ * * All lanes must be active when this code runs. */ -void -ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +void ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) { - if (ws->enable_exclusive) { - ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op); - if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd) - ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, ""); - ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op); - } else { - ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op); - } - - bool enable_inclusive = ws->enable_inclusive; - bool enable_exclusive = ws->enable_exclusive; - ws->enable_inclusive = false; - ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; - ac_build_wg_wavescan_top(ctx, ws); - ws->enable_inclusive = enable_inclusive; - ws->enable_exclusive = enable_exclusive; + if (ws->enable_exclusive) { + ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op); + if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd) + ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, ""); + ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op); + } else { + ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op); + } + + bool enable_inclusive = ws->enable_inclusive; + bool enable_exclusive = ws->enable_exclusive; + ws->enable_inclusive = false; + ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; + ac_build_wg_wavescan_top(ctx, ws); + ws->enable_inclusive = enable_inclusive; + ws->enable_exclusive = enable_exclusive; } /** @@ -4554,22 +4155,21 @@ * * The caller must place a barrier between the top and bottom halves. */ -void -ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +void ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) { - bool enable_inclusive = ws->enable_inclusive; - bool enable_exclusive = ws->enable_exclusive; - ws->enable_inclusive = false; - ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; - ac_build_wg_wavescan_bottom(ctx, ws); - ws->enable_inclusive = enable_inclusive; - ws->enable_exclusive = enable_exclusive; - - /* ws->result_reduce is already the correct value */ - if (ws->enable_inclusive) - ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op); - if (ws->enable_exclusive) - ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op); + bool enable_inclusive = ws->enable_inclusive; + bool enable_exclusive = ws->enable_exclusive; + ws->enable_inclusive = false; + ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; + ac_build_wg_wavescan_bottom(ctx, ws); + ws->enable_inclusive = enable_inclusive; + ws->enable_exclusive = enable_exclusive; + + /* ws->result_reduce is already the correct value */ + if (ws->enable_inclusive) + ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op); + if (ws->enable_exclusive) + ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op); } /** @@ -4578,114 +4178,101 @@ * The caller must ensure that all lanes are active when this code runs * (WWM is insufficient!), because there is an implied barrier. */ -void -ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +void ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) { - ac_build_wg_scan_top(ctx, ws); - ac_build_s_barrier(ctx); - ac_build_wg_scan_bottom(ctx, ws); -} - -LLVMValueRef -ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, - unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) -{ - unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3); - if (ctx->chip_class >= GFX8) { - return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false); - } else { - return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask); - } -} - -LLVMValueRef -ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index) -{ - LLVMTypeRef type = LLVMTypeOf(src); - LLVMValueRef result; - - index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); - src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); - - result = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, - (LLVMValueRef []) {index, src}, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - return LLVMBuildTrunc(ctx->builder, result, type, ""); -} - -LLVMValueRef -ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize) -{ - LLVMTypeRef type; - char *intr; - - if (bitsize == 16) { - intr = "llvm.amdgcn.frexp.exp.i16.f16"; - type = ctx->i16; - } else if (bitsize == 32) { - intr = "llvm.amdgcn.frexp.exp.i32.f32"; - type = ctx->i32; - } else { - intr = "llvm.amdgcn.frexp.exp.i32.f64"; - type = ctx->i32; - } - - LLVMValueRef params[] = { - src0, - }; - return ac_build_intrinsic(ctx, intr, type, params, 1, - AC_FUNC_ATTR_READNONE); -} -LLVMValueRef -ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize) -{ - LLVMTypeRef type; - char *intr; - - if (bitsize == 16) { - intr = "llvm.amdgcn.frexp.mant.f16"; - type = ctx->f16; - } else if (bitsize == 32) { - intr = "llvm.amdgcn.frexp.mant.f32"; - type = ctx->f32; - } else { - intr = "llvm.amdgcn.frexp.mant.f64"; - type = ctx->f64; - } - - LLVMValueRef params[] = { - src0, - }; - return ac_build_intrinsic(ctx, intr, type, params, 1, - AC_FUNC_ATTR_READNONE); -} - -LLVMValueRef -ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize) -{ - LLVMTypeRef type; - char *intr; - - if (bitsize == 16) { - intr = "llvm.canonicalize.f16"; - type = ctx->f16; - } else if (bitsize == 32) { - intr = "llvm.canonicalize.f32"; - type = ctx->f32; - } else { - intr = "llvm.canonicalize.f64"; - type = ctx->f64; - } - - LLVMValueRef params[] = { - src0, - }; - return ac_build_intrinsic(ctx, intr, type, params, 1, - AC_FUNC_ATTR_READNONE); + ac_build_wg_scan_top(ctx, ws); + ac_build_s_barrier(ctx); + ac_build_wg_scan_bottom(ctx, ws); +} + +LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, + unsigned lane1, unsigned lane2, unsigned lane3) +{ + unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3); + if (ctx->chip_class >= GFX8) { + return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false); + } else { + return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask); + } +} + +LLVMValueRef ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index) +{ + LLVMTypeRef type = LLVMTypeOf(src); + LLVMValueRef result; + + index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); + src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); + + result = + ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, (LLVMValueRef[]){index, src}, 2, + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + return LLVMBuildTrunc(ctx->builder, result, type, ""); +} + +LLVMValueRef ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + LLVMTypeRef type; + char *intr; + + if (bitsize == 16) { + intr = "llvm.amdgcn.frexp.exp.i16.f16"; + type = ctx->i16; + } else if (bitsize == 32) { + intr = "llvm.amdgcn.frexp.exp.i32.f32"; + type = ctx->i32; + } else { + intr = "llvm.amdgcn.frexp.exp.i32.f64"; + type = ctx->i32; + } + + LLVMValueRef params[] = { + src0, + }; + return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); +} +LLVMValueRef ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + LLVMTypeRef type; + char *intr; + + if (bitsize == 16) { + intr = "llvm.amdgcn.frexp.mant.f16"; + type = ctx->f16; + } else if (bitsize == 32) { + intr = "llvm.amdgcn.frexp.mant.f32"; + type = ctx->f32; + } else { + intr = "llvm.amdgcn.frexp.mant.f64"; + type = ctx->f64; + } + + LLVMValueRef params[] = { + src0, + }; + return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + LLVMTypeRef type; + char *intr; + + if (bitsize == 16) { + intr = "llvm.canonicalize.f16"; + type = ctx->f16; + } else if (bitsize == 32) { + intr = "llvm.canonicalize.f32"; + type = ctx->f32; + } else { + intr = "llvm.canonicalize.f64"; + type = ctx->f64; + } + + LLVMValueRef params[] = { + src0, + }; + return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); } /* @@ -4693,124 +4280,111 @@ * and works out the X and Y derivatives. * it returns DDX(I), DDX(J), DDY(I), DDY(J). */ -LLVMValueRef -ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij) +LLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij) { - LLVMValueRef result[4], a; - unsigned i; + LLVMValueRef result[4], a; + unsigned i; - for (i = 0; i < 2; i++) { - a = LLVMBuildExtractElement(ctx->builder, interp_ij, - LLVMConstInt(ctx->i32, i, false), ""); - result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a); - result[2+i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a); - } - return ac_build_gather_values(ctx, result, 4); -} - -LLVMValueRef -ac_build_load_helper_invocation(struct ac_llvm_context *ctx) -{ - LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", - ctx->i1, NULL, 0, - AC_FUNC_ATTR_READNONE); - result = LLVMBuildNot(ctx->builder, result, ""); - return LLVMBuildSExt(ctx->builder, result, ctx->i32, ""); -} - -LLVMValueRef -ac_build_is_helper_invocation(struct ac_llvm_context *ctx) -{ - if (!ctx->postponed_kill) - return ac_build_load_helper_invocation(ctx); - - /* !(exact && postponed) */ - LLVMValueRef exact = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", - ctx->i1, NULL, 0, - AC_FUNC_ATTR_READNONE); - - LLVMValueRef postponed = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, ""); - LLVMValueRef result = LLVMBuildAnd(ctx->builder, exact, postponed, ""); - - return LLVMBuildSelect(ctx->builder, result, ctx->i32_0, - LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), ""); -} - -LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, - LLVMValueRef *args, unsigned num_args) -{ - LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, ""); - LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func)); - return ret; -} - -void -ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, - LLVMValueRef stencil, LLVMValueRef samplemask, - struct ac_export_args *args) -{ - unsigned mask = 0; - unsigned format = ac_get_spi_shader_z_format(depth != NULL, - stencil != NULL, - samplemask != NULL); - - assert(depth || stencil || samplemask); - - memset(args, 0, sizeof(*args)); - - args->valid_mask = 1; /* whether the EXEC mask is valid */ - args->done = 1; /* DONE bit */ - - /* Specify the target we are exporting */ - args->target = V_008DFC_SQ_EXP_MRTZ; - - args->compr = 0; /* COMP flag */ - args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */ - args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */ - args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */ - args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */ - - if (format == V_028710_SPI_SHADER_UINT16_ABGR) { - assert(!depth); - args->compr = 1; /* COMPR flag */ - - if (stencil) { - /* Stencil should be in X[23:16]. */ - stencil = ac_to_integer(ctx, stencil); - stencil = LLVMBuildShl(ctx->builder, stencil, - LLVMConstInt(ctx->i32, 16, 0), ""); - args->out[0] = ac_to_float(ctx, stencil); - mask |= 0x3; - } - if (samplemask) { - /* SampleMask should be in Y[15:0]. */ - args->out[1] = samplemask; - mask |= 0xc; - } - } else { - if (depth) { - args->out[0] = depth; - mask |= 0x1; - } - if (stencil) { - args->out[1] = stencil; - mask |= 0x2; - } - if (samplemask) { - args->out[2] = samplemask; - mask |= 0x4; - } - } - - /* GFX6 (except OLAND and HAINAN) has a bug that it only looks - * at the X writemask component. */ - if (ctx->chip_class == GFX6 && - ctx->family != CHIP_OLAND && - ctx->family != CHIP_HAINAN) - mask |= 0x1; + for (i = 0; i < 2; i++) { + a = LLVMBuildExtractElement(ctx->builder, interp_ij, LLVMConstInt(ctx->i32, i, false), ""); + result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a); + result[2 + i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a); + } + return ac_build_gather_values(ctx, result, 4); +} + +LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx) +{ + LLVMValueRef result = + ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, AC_FUNC_ATTR_READNONE); + result = LLVMBuildNot(ctx->builder, result, ""); + return LLVMBuildSExt(ctx->builder, result, ctx->i32, ""); +} + +LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx) +{ + if (!ctx->postponed_kill) + return ac_build_load_helper_invocation(ctx); + + /* !(exact && postponed) */ + LLVMValueRef exact = + ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, AC_FUNC_ATTR_READNONE); + + LLVMValueRef postponed = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, ""); + LLVMValueRef result = LLVMBuildAnd(ctx->builder, exact, postponed, ""); + + return LLVMBuildSelect(ctx->builder, result, ctx->i32_0, + LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), ""); +} + +LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args, + unsigned num_args) +{ + LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, ""); + LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func)); + return ret; +} + +void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, + LLVMValueRef samplemask, struct ac_export_args *args) +{ + unsigned mask = 0; + unsigned format = ac_get_spi_shader_z_format(depth != NULL, stencil != NULL, samplemask != NULL); + + assert(depth || stencil || samplemask); + + memset(args, 0, sizeof(*args)); + + args->valid_mask = 1; /* whether the EXEC mask is valid */ + args->done = 1; /* DONE bit */ + + /* Specify the target we are exporting */ + args->target = V_008DFC_SQ_EXP_MRTZ; + + args->compr = 0; /* COMP flag */ + args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */ + args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */ + args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */ + args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */ + + if (format == V_028710_SPI_SHADER_UINT16_ABGR) { + assert(!depth); + args->compr = 1; /* COMPR flag */ + + if (stencil) { + /* Stencil should be in X[23:16]. */ + stencil = ac_to_integer(ctx, stencil); + stencil = LLVMBuildShl(ctx->builder, stencil, LLVMConstInt(ctx->i32, 16, 0), ""); + args->out[0] = ac_to_float(ctx, stencil); + mask |= 0x3; + } + if (samplemask) { + /* SampleMask should be in Y[15:0]. */ + args->out[1] = samplemask; + mask |= 0xc; + } + } else { + if (depth) { + args->out[0] = depth; + mask |= 0x1; + } + if (stencil) { + args->out[1] = stencil; + mask |= 0x2; + } + if (samplemask) { + args->out[2] = samplemask; + mask |= 0x4; + } + } + + /* GFX6 (except OLAND and HAINAN) has a bug that it only looks + * at the X writemask component. */ + if (ctx->chip_class == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN) + mask |= 0x1; - /* Specify which components to enable */ - args->enabled_channels = mask; + /* Specify which components to enable */ + args->enabled_channels = mask; } /* Send GS Alloc Req message from the first wave of the group to SPI. @@ -4819,217 +4393,204 @@ * - bits 12..22: primitives in group */ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id, - LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt) + LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt) { - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef tmp; - bool export_dummy_prim = false; - - /* HW workaround for a GPU hang with 100% culling. - * We always have to export at least 1 primitive. - * Export a degenerate triangle using vertex 0 for all 3 vertices. - */ - if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) { - assert(vtx_cnt == ctx->i32_0); - prim_cnt = ctx->i32_1; - vtx_cnt = ctx->i32_1; - export_dummy_prim = true; - } - - ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020); - - tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false),""); - tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); - ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp); - - if (export_dummy_prim) { - struct ac_ngg_prim prim = {}; - /* The vertex indices are 0,0,0. */ - prim.passthrough = ctx->i32_0; - - struct ac_export_args pos = {}; - pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0; - pos.target = V_008DFC_SQ_EXP_POS; - pos.enabled_channels = 0xf; - pos.done = true; - - ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), - ctx->i32_0, ""), 5021); - ac_build_export_prim(ctx, &prim); - ac_build_export(ctx, &pos); - ac_build_endif(ctx, 5021); - } - - ac_build_endif(ctx, 5020); -} - -LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, - const struct ac_ngg_prim *prim) -{ - /* The prim export format is: - * - bits 0..8: index 0 - * - bit 9: edge flag 0 - * - bits 10..18: index 1 - * - bit 19: edge flag 1 - * - bits 20..28: index 2 - * - bit 29: edge flag 2 - * - bit 31: null primitive (skip) - */ - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, ""); - LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), ""); - - for (unsigned i = 0; i < prim->num_vertices; ++i) { - tmp = LLVMBuildShl(builder, prim->index[i], - LLVMConstInt(ctx->i32, 10 * i, false), ""); - result = LLVMBuildOr(builder, result, tmp, ""); - tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->i32, ""); - tmp = LLVMBuildShl(builder, tmp, - LLVMConstInt(ctx->i32, 10 * i + 9, false), ""); - result = LLVMBuildOr(builder, result, tmp, ""); - } - return result; -} - -void ac_build_export_prim(struct ac_llvm_context *ctx, - const struct ac_ngg_prim *prim) -{ - struct ac_export_args args; - - if (prim->passthrough) { - args.out[0] = prim->passthrough; - } else { - args.out[0] = ac_pack_prim_export(ctx, prim); - } - - args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, ""); - args.out[1] = LLVMGetUndef(ctx->f32); - args.out[2] = LLVMGetUndef(ctx->f32); - args.out[3] = LLVMGetUndef(ctx->f32); - - args.target = V_008DFC_SQ_EXP_PRIM; - args.enabled_channels = 1; - args.done = true; - args.valid_mask = false; - args.compr = false; - - ac_build_export(ctx, &args); -} - -static LLVMTypeRef -arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx) -{ - if (type == AC_ARG_FLOAT) { - return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size); - } else if (type == AC_ARG_INT) { - return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size); - } else { - LLVMTypeRef ptr_type; - switch (type) { - case AC_ARG_CONST_PTR: - ptr_type = ctx->i8; - break; - case AC_ARG_CONST_FLOAT_PTR: - ptr_type = ctx->f32; - break; - case AC_ARG_CONST_PTR_PTR: - ptr_type = ac_array_in_const32_addr_space(ctx->i8); - break; - case AC_ARG_CONST_DESC_PTR: - ptr_type = ctx->v4i32; - break; - case AC_ARG_CONST_IMAGE_PTR: - ptr_type = ctx->v8i32; - break; - default: - unreachable("unknown arg type"); - } - if (size == 1) { - return ac_array_in_const32_addr_space(ptr_type); - } else { - assert(size == 2); - return ac_array_in_const_addr_space(ptr_type); - } - } -} - -LLVMValueRef -ac_build_main(const struct ac_shader_args *args, - struct ac_llvm_context *ctx, - enum ac_llvm_calling_convention convention, - const char *name, LLVMTypeRef ret_type, - LLVMModuleRef module) -{ - LLVMTypeRef arg_types[AC_MAX_ARGS]; - - for (unsigned i = 0; i < args->arg_count; i++) { - arg_types[i] = arg_llvm_type(args->args[i].type, - args->args[i].size, ctx); - } - - LLVMTypeRef main_function_type = - LLVMFunctionType(ret_type, arg_types, args->arg_count, 0); - - LLVMValueRef main_function = - LLVMAddFunction(module, name, main_function_type); - LLVMBasicBlockRef main_function_body = - LLVMAppendBasicBlockInContext(ctx->context, main_function, "main_body"); - LLVMPositionBuilderAtEnd(ctx->builder, main_function_body); - - LLVMSetFunctionCallConv(main_function, convention); - for (unsigned i = 0; i < args->arg_count; ++i) { - LLVMValueRef P = LLVMGetParam(main_function, i); - - if (args->args[i].file != AC_ARG_SGPR) - continue; - - ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_INREG); - - if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { - ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); - ac_add_attr_dereferenceable(P, UINT64_MAX); - ac_add_attr_alignment(P, 32); - } - } - - ctx->main_function = main_function; - - if (LLVM_VERSION_MAJOR >= 11) { - /* Enable denormals for FP16 and FP64: */ - LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", - "ieee,ieee"); - /* Disable denormals for FP32: */ - LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32", - "preserve-sign,preserve-sign"); - } - return main_function; + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef tmp; + bool export_dummy_prim = false; + + /* HW workaround for a GPU hang with 100% culling. + * We always have to export at least 1 primitive. + * Export a degenerate triangle using vertex 0 for all 3 vertices. + */ + if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) { + assert(vtx_cnt == ctx->i32_0); + prim_cnt = ctx->i32_1; + vtx_cnt = ctx->i32_1; + export_dummy_prim = true; + } + + ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020); + + tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false), ""); + tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); + ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp); + + if (export_dummy_prim) { + struct ac_ngg_prim prim = {}; + /* The vertex indices are 0,0,0. */ + prim.passthrough = ctx->i32_0; + + struct ac_export_args pos = {}; + pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0; + pos.target = V_008DFC_SQ_EXP_POS; + pos.enabled_channels = 0xf; + pos.done = true; + + ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), ctx->i32_0, ""), + 5021); + ac_build_export_prim(ctx, &prim); + ac_build_export(ctx, &pos); + ac_build_endif(ctx, 5021); + } + + ac_build_endif(ctx, 5020); +} + +LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim) +{ + /* The prim export format is: + * - bits 0..8: index 0 + * - bit 9: edge flag 0 + * - bits 10..18: index 1 + * - bit 19: edge flag 1 + * - bits 20..28: index 2 + * - bit 29: edge flag 2 + * - bit 31: null primitive (skip) + */ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, ""); + LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), ""); + + for (unsigned i = 0; i < prim->num_vertices; ++i) { + tmp = LLVMBuildShl(builder, prim->index[i], LLVMConstInt(ctx->i32, 10 * i, false), ""); + result = LLVMBuildOr(builder, result, tmp, ""); + tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->i32, ""); + tmp = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 10 * i + 9, false), ""); + result = LLVMBuildOr(builder, result, tmp, ""); + } + return result; +} + +void ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim) +{ + struct ac_export_args args; + + if (prim->passthrough) { + args.out[0] = prim->passthrough; + } else { + args.out[0] = ac_pack_prim_export(ctx, prim); + } + + args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, ""); + args.out[1] = LLVMGetUndef(ctx->f32); + args.out[2] = LLVMGetUndef(ctx->f32); + args.out[3] = LLVMGetUndef(ctx->f32); + + args.target = V_008DFC_SQ_EXP_PRIM; + args.enabled_channels = 1; + args.done = true; + args.valid_mask = false; + args.compr = false; + + ac_build_export(ctx, &args); +} + +static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx) +{ + if (type == AC_ARG_FLOAT) { + return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size); + } else if (type == AC_ARG_INT) { + return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size); + } else { + LLVMTypeRef ptr_type; + switch (type) { + case AC_ARG_CONST_PTR: + ptr_type = ctx->i8; + break; + case AC_ARG_CONST_FLOAT_PTR: + ptr_type = ctx->f32; + break; + case AC_ARG_CONST_PTR_PTR: + ptr_type = ac_array_in_const32_addr_space(ctx->i8); + break; + case AC_ARG_CONST_DESC_PTR: + ptr_type = ctx->v4i32; + break; + case AC_ARG_CONST_IMAGE_PTR: + ptr_type = ctx->v8i32; + break; + default: + unreachable("unknown arg type"); + } + if (size == 1) { + return ac_array_in_const32_addr_space(ptr_type); + } else { + assert(size == 2); + return ac_array_in_const_addr_space(ptr_type); + } + } +} + +LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx, + enum ac_llvm_calling_convention convention, const char *name, + LLVMTypeRef ret_type, LLVMModuleRef module) +{ + LLVMTypeRef arg_types[AC_MAX_ARGS]; + + for (unsigned i = 0; i < args->arg_count; i++) { + arg_types[i] = arg_llvm_type(args->args[i].type, args->args[i].size, ctx); + } + + LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, args->arg_count, 0); + + LLVMValueRef main_function = LLVMAddFunction(module, name, main_function_type); + LLVMBasicBlockRef main_function_body = + LLVMAppendBasicBlockInContext(ctx->context, main_function, "main_body"); + LLVMPositionBuilderAtEnd(ctx->builder, main_function_body); + + LLVMSetFunctionCallConv(main_function, convention); + for (unsigned i = 0; i < args->arg_count; ++i) { + LLVMValueRef P = LLVMGetParam(main_function, i); + + if (args->args[i].file != AC_ARG_SGPR) + continue; + + ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_INREG); + + if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { + ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); + ac_add_attr_dereferenceable(P, UINT64_MAX); + ac_add_attr_alignment(P, 32); + } + } + + ctx->main_function = main_function; + + if (LLVM_VERSION_MAJOR >= 11) { + /* Enable denormals for FP16 and FP64: */ + LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", "ieee,ieee"); + /* Disable denormals for FP32: */ + LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32", + "preserve-sign,preserve-sign"); + } + return main_function; } void ac_build_s_endpgm(struct ac_llvm_context *ctx) { - LLVMTypeRef calltype = LLVMFunctionType(ctx->voidt, NULL, 0, false); - LLVMValueRef code = LLVMConstInlineAsm(calltype, "s_endpgm", "", true, false); - LLVMBuildCall(ctx->builder, code, NULL, 0, ""); -} - -LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx, - LLVMValueRef mask, LLVMValueRef index) -{ - LLVMBuilderRef builder = ctx->builder; - LLVMTypeRef type = LLVMTypeOf(mask); - - LLVMValueRef bit = LLVMBuildShl(builder, LLVMConstInt(type, 1, 0), - LLVMBuildZExt(builder, index, type, ""), ""); - LLVMValueRef prefix_bits = LLVMBuildSub(builder, bit, LLVMConstInt(type, 1, 0), ""); - LLVMValueRef prefix_mask = LLVMBuildAnd(builder, mask, prefix_bits, ""); - return ac_build_bit_count(ctx, prefix_mask); + LLVMTypeRef calltype = LLVMFunctionType(ctx->voidt, NULL, 0, false); + LLVMValueRef code = LLVMConstInlineAsm(calltype, "s_endpgm", "", true, false); + LLVMBuildCall(ctx->builder, code, NULL, 0, ""); +} + +LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef index) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMTypeRef type = LLVMTypeOf(mask); + + LLVMValueRef bit = + LLVMBuildShl(builder, LLVMConstInt(type, 1, 0), LLVMBuildZExt(builder, index, type, ""), ""); + LLVMValueRef prefix_bits = LLVMBuildSub(builder, bit, LLVMConstInt(type, 1, 0), ""); + LLVMValueRef prefix_mask = LLVMBuildAnd(builder, mask, prefix_bits, ""); + return ac_build_bit_count(ctx, prefix_mask); } /* Compute the prefix sum of the "mask" bit array with 128 elements (bits). */ -LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx, - LLVMValueRef mask[2], LLVMValueRef index) +LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx, LLVMValueRef mask[2], + LLVMValueRef index) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ctx->builder; #if 0 /* Reference version using i128. */ LLVMValueRef input_mask = @@ -5037,37 +4598,37 @@ return ac_prefix_bitcount(ctx, input_mask, index); #else - /* Optimized version using 2 64-bit masks. */ - LLVMValueRef is_hi, is_0, c64, c128, all_bits; - LLVMValueRef prefix_mask[2], shift[2], mask_bcnt0, prefix_bcnt[2]; - - /* Compute the 128-bit prefix mask. */ - c64 = LLVMConstInt(ctx->i32, 64, 0); - c128 = LLVMConstInt(ctx->i32, 128, 0); - all_bits = LLVMConstInt(ctx->i64, UINT64_MAX, 0); - /* The first index that can have non-zero high bits in the prefix mask is 65. */ - is_hi = LLVMBuildICmp(builder, LLVMIntUGT, index, c64, ""); - is_0 = LLVMBuildICmp(builder, LLVMIntEQ, index, ctx->i32_0, ""); - mask_bcnt0 = ac_build_bit_count(ctx, mask[0]); - - for (unsigned i = 0; i < 2; i++) { - shift[i] = LLVMBuildSub(builder, i ? c128 : c64, index, ""); - /* For i==0, index==0, the right shift by 64 doesn't give the desired result, - * so we handle it by the is_0 select. - * For i==1, index==64, same story, so we handle it by the last is_hi select. - * For i==0, index==64, we shift by 0, which is what we want. - */ - prefix_mask[i] = LLVMBuildLShr(builder, all_bits, - LLVMBuildZExt(builder, shift[i], ctx->i64, ""), ""); - prefix_mask[i] = LLVMBuildAnd(builder, mask[i], prefix_mask[i], ""); - prefix_bcnt[i] = ac_build_bit_count(ctx, prefix_mask[i]); - } - - prefix_bcnt[0] = LLVMBuildSelect(builder, is_0, ctx->i32_0, prefix_bcnt[0], ""); - prefix_bcnt[0] = LLVMBuildSelect(builder, is_hi, mask_bcnt0, prefix_bcnt[0], ""); - prefix_bcnt[1] = LLVMBuildSelect(builder, is_hi, prefix_bcnt[1], ctx->i32_0, ""); + /* Optimized version using 2 64-bit masks. */ + LLVMValueRef is_hi, is_0, c64, c128, all_bits; + LLVMValueRef prefix_mask[2], shift[2], mask_bcnt0, prefix_bcnt[2]; + + /* Compute the 128-bit prefix mask. */ + c64 = LLVMConstInt(ctx->i32, 64, 0); + c128 = LLVMConstInt(ctx->i32, 128, 0); + all_bits = LLVMConstInt(ctx->i64, UINT64_MAX, 0); + /* The first index that can have non-zero high bits in the prefix mask is 65. */ + is_hi = LLVMBuildICmp(builder, LLVMIntUGT, index, c64, ""); + is_0 = LLVMBuildICmp(builder, LLVMIntEQ, index, ctx->i32_0, ""); + mask_bcnt0 = ac_build_bit_count(ctx, mask[0]); + + for (unsigned i = 0; i < 2; i++) { + shift[i] = LLVMBuildSub(builder, i ? c128 : c64, index, ""); + /* For i==0, index==0, the right shift by 64 doesn't give the desired result, + * so we handle it by the is_0 select. + * For i==1, index==64, same story, so we handle it by the last is_hi select. + * For i==0, index==64, we shift by 0, which is what we want. + */ + prefix_mask[i] = + LLVMBuildLShr(builder, all_bits, LLVMBuildZExt(builder, shift[i], ctx->i64, ""), ""); + prefix_mask[i] = LLVMBuildAnd(builder, mask[i], prefix_mask[i], ""); + prefix_bcnt[i] = ac_build_bit_count(ctx, prefix_mask[i]); + } + + prefix_bcnt[0] = LLVMBuildSelect(builder, is_0, ctx->i32_0, prefix_bcnt[0], ""); + prefix_bcnt[0] = LLVMBuildSelect(builder, is_hi, mask_bcnt0, prefix_bcnt[0], ""); + prefix_bcnt[1] = LLVMBuildSelect(builder, is_hi, prefix_bcnt[1], ctx->i32_0, ""); - return LLVMBuildAdd(builder, prefix_bcnt[0], prefix_bcnt[1], ""); + return LLVMBuildAdd(builder, prefix_bcnt[0], prefix_bcnt[1], ""); #endif } @@ -5075,33 +4636,26 @@ * Convert triangle strip indices to triangle indices. This is used to decompose * triangle strips into triangles. */ -void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, - LLVMValueRef is_odd, - LLVMValueRef flatshade_first, - LLVMValueRef index[3]) -{ - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef out[3]; - - /* We need to change the vertex order for odd triangles to get correct - * front/back facing by swapping 2 vertex indices, but we also have to - * keep the provoking vertex in the same place. - * - * If the first vertex is provoking, swap index 1 and 2. - * If the last vertex is provoking, swap index 0 and 1. - */ - out[0] = LLVMBuildSelect(builder, flatshade_first, - index[0], - LLVMBuildSelect(builder, is_odd, - index[1], index[0], ""), ""); - out[1] = LLVMBuildSelect(builder, flatshade_first, - LLVMBuildSelect(builder, is_odd, - index[2], index[1], ""), - LLVMBuildSelect(builder, is_odd, - index[0], index[1], ""), ""); - out[2] = LLVMBuildSelect(builder, flatshade_first, - LLVMBuildSelect(builder, is_odd, - index[1], index[2], ""), - index[2], ""); - memcpy(index, out, sizeof(out)); +void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd, + LLVMValueRef flatshade_first, + LLVMValueRef index[3]) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef out[3]; + + /* We need to change the vertex order for odd triangles to get correct + * front/back facing by swapping 2 vertex indices, but we also have to + * keep the provoking vertex in the same place. + * + * If the first vertex is provoking, swap index 1 and 2. + * If the last vertex is provoking, swap index 0 and 1. + */ + out[0] = LLVMBuildSelect(builder, flatshade_first, index[0], + LLVMBuildSelect(builder, is_odd, index[1], index[0], ""), ""); + out[1] = LLVMBuildSelect(builder, flatshade_first, + LLVMBuildSelect(builder, is_odd, index[2], index[1], ""), + LLVMBuildSelect(builder, is_odd, index[0], index[1], ""), ""); + out[2] = LLVMBuildSelect(builder, flatshade_first, + LLVMBuildSelect(builder, is_odd, index[1], index[2], ""), index[2], ""); + memcpy(index, out, sizeof(out)); } diff -Nru mesa-20.2.1/src/amd/llvm/ac_llvm_build.h mesa-20.2.6/src/amd/llvm/ac_llvm_build.h --- mesa-20.2.1/src/amd/llvm/ac_llvm_build.h 2020-10-14 17:19:10.293182800 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_llvm_build.h 2020-12-16 21:42:03.543110000 +0000 @@ -25,140 +25,134 @@ #ifndef AC_LLVM_BUILD_H #define AC_LLVM_BUILD_H -#include -#include -#include "compiler/nir/nir.h" -#include "amd_family.h" -#include "ac_shader_util.h" -#include "ac_shader_args.h" #include "ac_shader_abi.h" +#include "ac_shader_args.h" +#include "ac_shader_util.h" +#include "amd_family.h" +#include "compiler/nir/nir.h" +#include + +#include #ifdef __cplusplus extern "C" { #endif -enum { - AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */ - AC_ADDR_SPACE_GLOBAL = 1, - AC_ADDR_SPACE_GDS = 2, - AC_ADDR_SPACE_LDS = 3, - AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */ - AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */ +enum +{ + AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */ + AC_ADDR_SPACE_GLOBAL = 1, + AC_ADDR_SPACE_GDS = 2, + AC_ADDR_SPACE_LDS = 3, + AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */ + AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */ }; -#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */ -#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */ -#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */ +#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */ +#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */ +#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */ struct ac_llvm_flow; struct ac_llvm_compiler; enum ac_float_mode; struct ac_llvm_flow_state { - struct ac_llvm_flow *stack; - unsigned depth_max; - unsigned depth; + struct ac_llvm_flow *stack; + unsigned depth_max; + unsigned depth; }; struct ac_llvm_context { - LLVMContextRef context; - LLVMModuleRef module; - LLVMBuilderRef builder; - - LLVMValueRef main_function; - - LLVMTypeRef voidt; - LLVMTypeRef i1; - LLVMTypeRef i8; - LLVMTypeRef i16; - LLVMTypeRef i32; - LLVMTypeRef i64; - LLVMTypeRef i128; - LLVMTypeRef intptr; - LLVMTypeRef f16; - LLVMTypeRef f32; - LLVMTypeRef f64; - LLVMTypeRef v2i16; - LLVMTypeRef v4i16; - LLVMTypeRef v2f16; - LLVMTypeRef v4f16; - LLVMTypeRef v2i32; - LLVMTypeRef v3i32; - LLVMTypeRef v4i32; - LLVMTypeRef v2f32; - LLVMTypeRef v3f32; - LLVMTypeRef v4f32; - LLVMTypeRef v8i32; - LLVMTypeRef iN_wavemask; - LLVMTypeRef iN_ballotmask; - - LLVMValueRef i8_0; - LLVMValueRef i8_1; - LLVMValueRef i16_0; - LLVMValueRef i16_1; - LLVMValueRef i32_0; - LLVMValueRef i32_1; - LLVMValueRef i64_0; - LLVMValueRef i64_1; - LLVMValueRef i128_0; - LLVMValueRef i128_1; - LLVMValueRef f16_0; - LLVMValueRef f16_1; - LLVMValueRef f32_0; - LLVMValueRef f32_1; - LLVMValueRef f64_0; - LLVMValueRef f64_1; - LLVMValueRef i1true; - LLVMValueRef i1false; - - /* Temporary helper to implement demote_to_helper: - * True = live lanes - * False = demoted lanes - */ - LLVMValueRef postponed_kill; - - /* Since ac_nir_translate makes a local copy of ac_llvm_context, there - * are two ac_llvm_contexts. Declare a pointer here, so that the control - * flow stack is shared by both ac_llvm_contexts. - */ - struct ac_llvm_flow_state *flow; - - unsigned range_md_kind; - unsigned invariant_load_md_kind; - unsigned uniform_md_kind; - LLVMValueRef empty_md; - - enum chip_class chip_class; - enum radeon_family family; - - unsigned wave_size; - unsigned ballot_mask_bits; - - unsigned float_mode; - - LLVMValueRef lds; -}; - -void -ac_llvm_context_init(struct ac_llvm_context *ctx, - struct ac_llvm_compiler *compiler, - enum chip_class chip_class, enum radeon_family family, - enum ac_float_mode float_mode, unsigned wave_size, - unsigned ballot_mask_bits); - -void -ac_llvm_context_dispose(struct ac_llvm_context *ctx); - -int -ac_get_llvm_num_components(LLVMValueRef value); - -int -ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type); - -LLVMValueRef -ac_llvm_extract_elem(struct ac_llvm_context *ac, - LLVMValueRef value, - int index); + LLVMContextRef context; + LLVMModuleRef module; + LLVMBuilderRef builder; + + LLVMValueRef main_function; + + LLVMTypeRef voidt; + LLVMTypeRef i1; + LLVMTypeRef i8; + LLVMTypeRef i16; + LLVMTypeRef i32; + LLVMTypeRef i64; + LLVMTypeRef i128; + LLVMTypeRef intptr; + LLVMTypeRef f16; + LLVMTypeRef f32; + LLVMTypeRef f64; + LLVMTypeRef v2i16; + LLVMTypeRef v4i16; + LLVMTypeRef v2f16; + LLVMTypeRef v4f16; + LLVMTypeRef v2i32; + LLVMTypeRef v3i32; + LLVMTypeRef v4i32; + LLVMTypeRef v2f32; + LLVMTypeRef v3f32; + LLVMTypeRef v4f32; + LLVMTypeRef v8i32; + LLVMTypeRef iN_wavemask; + LLVMTypeRef iN_ballotmask; + + LLVMValueRef i8_0; + LLVMValueRef i8_1; + LLVMValueRef i16_0; + LLVMValueRef i16_1; + LLVMValueRef i32_0; + LLVMValueRef i32_1; + LLVMValueRef i64_0; + LLVMValueRef i64_1; + LLVMValueRef i128_0; + LLVMValueRef i128_1; + LLVMValueRef f16_0; + LLVMValueRef f16_1; + LLVMValueRef f32_0; + LLVMValueRef f32_1; + LLVMValueRef f64_0; + LLVMValueRef f64_1; + LLVMValueRef i1true; + LLVMValueRef i1false; + + /* Temporary helper to implement demote_to_helper: + * True = live lanes + * False = demoted lanes + */ + LLVMValueRef postponed_kill; + + /* Since ac_nir_translate makes a local copy of ac_llvm_context, there + * are two ac_llvm_contexts. Declare a pointer here, so that the control + * flow stack is shared by both ac_llvm_contexts. + */ + struct ac_llvm_flow_state *flow; + + unsigned range_md_kind; + unsigned invariant_load_md_kind; + unsigned uniform_md_kind; + LLVMValueRef empty_md; + + enum chip_class chip_class; + enum radeon_family family; + + unsigned wave_size; + unsigned ballot_mask_bits; + + unsigned float_mode; + + LLVMValueRef lds; +}; + +void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler, + enum chip_class chip_class, enum radeon_family family, + enum ac_float_mode float_mode, unsigned wave_size, + unsigned ballot_mask_bits); + +void ac_llvm_context_dispose(struct ac_llvm_context *ctx); + +int ac_get_llvm_num_components(LLVMValueRef value); + +int ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type); + +LLVMValueRef ac_llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index); unsigned ac_get_type_size(LLVMTypeRef type); @@ -168,28 +162,22 @@ LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t); LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v); -LLVMValueRef -ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, - LLVMTypeRef return_type, LLVMValueRef *params, - unsigned param_count, unsigned attrib_mask); +LLVMValueRef ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, + LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count, + unsigned attrib_mask); void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize); -LLVMValueRef -ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, - unsigned count_incoming, LLVMValueRef *values, - LLVMBasicBlockRef *blocks); +LLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigned count_incoming, + LLVMValueRef *values, LLVMBasicBlockRef *blocks); void ac_build_s_barrier(struct ac_llvm_context *ctx); -void ac_build_optimization_barrier(struct ac_llvm_context *ctx, - LLVMValueRef *pvgpr); +void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pvgpr); -LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, - nir_scope scope); +LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope); LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value); -LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, - LLVMValueRef value); +LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, LLVMValueRef value); LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value); @@ -197,276 +185,153 @@ LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value); -LLVMValueRef -ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, - unsigned value_count, unsigned component); - -LLVMValueRef -ac_build_gather_values_extended(struct ac_llvm_context *ctx, - LLVMValueRef *values, - unsigned value_count, - unsigned value_stride, - bool load, - bool always_vector); -LLVMValueRef -ac_build_gather_values(struct ac_llvm_context *ctx, - LLVMValueRef *values, - unsigned value_count); - -LLVMValueRef -ac_extract_components(struct ac_llvm_context *ctx, - LLVMValueRef value, - unsigned start, - unsigned channels); - -LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, - LLVMValueRef value, - unsigned num_channels); +LLVMValueRef ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned component); + +LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned value_stride, bool load, + bool always_vector); +LLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count); + +LLVMValueRef ac_extract_components(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned start, + unsigned channels); + +LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value, + unsigned num_channels); LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value); -LLVMValueRef -ac_build_fdiv(struct ac_llvm_context *ctx, - LLVMValueRef num, - LLVMValueRef den); - -LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, - LLVMValueRef num, - LLVMValueRef multiplier, - LLVMValueRef pre_shift, - LLVMValueRef post_shift, - LLVMValueRef increment); -LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, - LLVMValueRef num, - LLVMValueRef multiplier, - LLVMValueRef pre_shift, - LLVMValueRef post_shift, - LLVMValueRef increment); -LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, - LLVMValueRef num, - LLVMValueRef multiplier, - LLVMValueRef post_shift); - -void -ac_prepare_cube_coords(struct ac_llvm_context *ctx, - bool is_deriv, bool is_array, bool is_lod, - LLVMValueRef *coords_arg, - LLVMValueRef *derivs_arg); - - -LLVMValueRef -ac_build_fs_interp(struct ac_llvm_context *ctx, - LLVMValueRef llvm_chan, - LLVMValueRef attr_number, - LLVMValueRef params, - LLVMValueRef i, - LLVMValueRef j); - -LLVMValueRef -ac_build_fs_interp_f16(struct ac_llvm_context *ctx, - LLVMValueRef llvm_chan, - LLVMValueRef attr_number, - LLVMValueRef params, - LLVMValueRef i, - LLVMValueRef j); - -LLVMValueRef -ac_build_fs_interp_mov(struct ac_llvm_context *ctx, - LLVMValueRef parameter, - LLVMValueRef llvm_chan, - LLVMValueRef attr_number, - LLVMValueRef params); - -LLVMValueRef -ac_build_gep_ptr(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, - LLVMValueRef index); - -LLVMValueRef -ac_build_gep0(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, - LLVMValueRef index); +LLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den); + +LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, LLVMValueRef num, + LLVMValueRef multiplier, LLVMValueRef pre_shift, + LLVMValueRef post_shift, LLVMValueRef increment); +LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef num, + LLVMValueRef multiplier, LLVMValueRef pre_shift, + LLVMValueRef post_shift, LLVMValueRef increment); +LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num, + LLVMValueRef multiplier, LLVMValueRef post_shift); + +void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod, + LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); + +LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, + LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, + LLVMValueRef j); + +LLVMValueRef ac_build_fs_interp_f16(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, + LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, + LLVMValueRef j); + +LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter, + LLVMValueRef llvm_chan, LLVMValueRef attr_number, + LLVMValueRef params); + +LLVMValueRef ac_build_gep_ptr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index); + +LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, - LLVMValueRef index); + LLVMValueRef index); + +void ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index, + LLVMValueRef value); -void -ac_build_indexed_store(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index, - LLVMValueRef value); - -LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, - LLVMValueRef index); -LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index); -LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index); +LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); +LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index); +LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index); LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index); + LLVMValueRef base_ptr, LLVMValueRef index); -void -ac_build_buffer_store_dword(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned cache_policy); - -void -ac_build_buffer_store_format(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef data, - LLVMValueRef vindex, - LLVMValueRef voffset, - unsigned cache_policy); - -LLVMValueRef -ac_build_buffer_load(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - int num_channels, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned cache_policy, - bool can_speculate, - bool allow_smem); - -LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - unsigned num_channels, - unsigned cache_policy, - bool can_speculate, - bool d16); - -LLVMValueRef -ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned cache_policy); - -LLVMValueRef -ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned cache_policy); - -LLVMValueRef -ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool can_speculate); - -LLVMValueRef -ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool can_speculate); +void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, + unsigned num_channels, LLVMValueRef voffset, LLVMValueRef soffset, + unsigned inst_offset, unsigned cache_policy); + +void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data, + LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy); + +LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels, + LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, + unsigned inst_offset, unsigned cache_policy, bool can_speculate, + bool allow_smem); + +LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + unsigned num_channels, unsigned cache_policy, + bool can_speculate, bool d16); + +LLVMValueRef ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef voffset, LLVMValueRef soffset, + LLVMValueRef immoffset, unsigned cache_policy); + +LLVMValueRef ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef voffset, LLVMValueRef soffset, + LLVMValueRef immoffset, unsigned cache_policy); + +LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, LLVMValueRef immoffset, + unsigned num_channels, unsigned dfmt, unsigned nfmt, + unsigned cache_policy, bool can_speculate); + +LLVMValueRef ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef voffset, LLVMValueRef soffset, + LLVMValueRef immoffset, unsigned num_channels, unsigned dfmt, + unsigned nfmt, unsigned cache_policy, bool can_speculate); /* For ac_build_fetch_format. * * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi). */ -enum { - AC_FETCH_FORMAT_FLOAT = 0, - AC_FETCH_FORMAT_FIXED, - AC_FETCH_FORMAT_UNORM, - AC_FETCH_FORMAT_SNORM, - AC_FETCH_FORMAT_USCALED, - AC_FETCH_FORMAT_SSCALED, - AC_FETCH_FORMAT_UINT, - AC_FETCH_FORMAT_SINT, -}; - -LLVMValueRef -ac_build_opencoded_load_format(struct ac_llvm_context *ctx, - unsigned log_size, - unsigned num_channels, - unsigned format, - bool reverse, - bool known_aligned, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned cache_policy, - bool can_speculate); - -void -ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned cache_policy); - -void -ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned cache_policy); - -void -ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy); - -void -ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy); +enum +{ + AC_FETCH_FORMAT_FLOAT = 0, + AC_FETCH_FORMAT_FIXED, + AC_FETCH_FORMAT_UNORM, + AC_FETCH_FORMAT_SNORM, + AC_FETCH_FORMAT_USCALED, + AC_FETCH_FORMAT_SSCALED, + AC_FETCH_FORMAT_UINT, + AC_FETCH_FORMAT_SINT, +}; + +LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size, + unsigned num_channels, unsigned format, bool reverse, + bool known_aligned, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, unsigned cache_policy, + bool can_speculate); + +void ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset, + unsigned cache_policy); + +void ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, + LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy); + +void ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, LLVMValueRef immoffset, + unsigned num_channels, unsigned dfmt, unsigned nfmt, + unsigned cache_policy); + +void ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, + LLVMValueRef voffset, LLVMValueRef soffset, LLVMValueRef immoffset, + unsigned num_channels, unsigned dfmt, unsigned nfmt, + unsigned cache_policy); -LLVMValueRef -ac_get_thread_id(struct ac_llvm_context *ctx); +LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx); #define AC_TID_MASK_TOP_LEFT 0xfffffffc #define AC_TID_MASK_TOP 0xfffffffd #define AC_TID_MASK_LEFT 0xfffffffe -LLVMValueRef -ac_build_ddxy(struct ac_llvm_context *ctx, - uint32_t mask, - int idx, - LLVMValueRef val); +LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, uint32_t mask, int idx, LLVMValueRef val); -#define AC_SENDMSG_GS 2 -#define AC_SENDMSG_GS_DONE 3 +#define AC_SENDMSG_GS 2 +#define AC_SENDMSG_GS_DONE 3 #define AC_SENDMSG_GS_ALLOC_REQ 9 #define AC_SENDMSG_GS_OP_NOP (0 << 4) @@ -474,162 +339,137 @@ #define AC_SENDMSG_GS_OP_EMIT (2 << 4) #define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4) -void ac_build_sendmsg(struct ac_llvm_context *ctx, - uint32_t msg, - LLVMValueRef wave_id); - -LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, - LLVMValueRef arg, - LLVMTypeRef dst_type); - -LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, - LLVMValueRef arg, - LLVMTypeRef dst_type); -LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b); -LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b); -LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b); -LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, - LLVMValueRef b); +void ac_build_sendmsg(struct ac_llvm_context *ctx, uint32_t msg, LLVMValueRef wave_id); + +LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type); + +LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type); +LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value); struct ac_export_args { - LLVMValueRef out[4]; - unsigned target; - unsigned enabled_channels; - bool compr; - bool done; - bool valid_mask; + LLVMValueRef out[4]; + unsigned target; + unsigned enabled_channels; + bool compr; + bool done; + bool valid_mask; }; void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a); void ac_build_export_null(struct ac_llvm_context *ctx); -enum ac_image_opcode { - ac_image_sample, - ac_image_gather4, - ac_image_load, - ac_image_load_mip, - ac_image_store, - ac_image_store_mip, - ac_image_get_lod, - ac_image_get_resinfo, - ac_image_atomic, - ac_image_atomic_cmpswap, -}; - -enum ac_atomic_op { - ac_atomic_swap, - ac_atomic_add, - ac_atomic_sub, - ac_atomic_smin, - ac_atomic_umin, - ac_atomic_smax, - ac_atomic_umax, - ac_atomic_and, - ac_atomic_or, - ac_atomic_xor, - ac_atomic_inc_wrap, - ac_atomic_dec_wrap, +enum ac_image_opcode +{ + ac_image_sample, + ac_image_gather4, + ac_image_load, + ac_image_load_mip, + ac_image_store, + ac_image_store_mip, + ac_image_get_lod, + ac_image_get_resinfo, + ac_image_atomic, + ac_image_atomic_cmpswap, +}; + +enum ac_atomic_op +{ + ac_atomic_swap, + ac_atomic_add, + ac_atomic_sub, + ac_atomic_smin, + ac_atomic_umin, + ac_atomic_smax, + ac_atomic_umax, + ac_atomic_and, + ac_atomic_or, + ac_atomic_xor, + ac_atomic_inc_wrap, + ac_atomic_dec_wrap, }; /* These cache policy bits match the definitions used by the LLVM intrinsics. */ -enum ac_image_cache_policy { - ac_glc = 1 << 0, /* per-CU cache control */ - ac_slc = 1 << 1, /* global L2 cache control */ - ac_dlc = 1 << 2, /* per-shader-array cache control */ - ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */ +enum ac_image_cache_policy +{ + ac_glc = 1 << 0, /* per-CU cache control */ + ac_slc = 1 << 1, /* global L2 cache control */ + ac_dlc = 1 << 2, /* per-shader-array cache control */ + ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */ }; struct ac_image_args { - enum ac_image_opcode opcode : 4; - enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */ - enum ac_image_dim dim : 3; - unsigned dmask : 4; - unsigned cache_policy : 3; - bool unorm : 1; - bool level_zero : 1; - bool d16 : 1; /* data and return values are 16-bit, requires GFX8+ */ - unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */ - - LLVMValueRef resource; - LLVMValueRef sampler; - LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */ - LLVMValueRef offset; - LLVMValueRef bias; - LLVMValueRef compare; - LLVMValueRef derivs[6]; - LLVMValueRef coords[4]; - LLVMValueRef lod; // also used by ac_image_get_resinfo - LLVMValueRef min_lod; -}; - -LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, - struct ac_image_args *a); -LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, - LLVMValueRef rsrc); -LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, - LLVMValueRef args[2]); -LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, - LLVMValueRef args[2]); -LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, - LLVMValueRef args[2]); -LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, - LLVMValueRef args[2], unsigned bits, bool hi); -LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, - LLVMValueRef args[2], unsigned bits, bool hi); + enum ac_image_opcode opcode : 4; + enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */ + enum ac_image_dim dim : 3; + unsigned dmask : 4; + unsigned cache_policy : 3; + bool unorm : 1; + bool level_zero : 1; + bool d16 : 1; /* data and return values are 16-bit, requires GFX8+ */ + unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */ + + LLVMValueRef resource; + LLVMValueRef sampler; + LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */ + LLVMValueRef offset; + LLVMValueRef bias; + LLVMValueRef compare; + LLVMValueRef derivs[6]; + LLVMValueRef coords[4]; + LLVMValueRef lod; // also used by ac_image_get_resinfo + LLVMValueRef min_lod; +}; + +LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a); +LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, LLVMValueRef rsrc); +LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, + bool hi); +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, + bool hi); LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1); void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1); -LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, - LLVMValueRef offset, LLVMValueRef width, - bool is_signed); -LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, - LLVMValueRef s1, LLVMValueRef s2); -LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, - LLVMValueRef s1, LLVMValueRef s2); +LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset, + LLVMValueRef width, bool is_signed); +LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, + LLVMValueRef s2); +LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, + LLVMValueRef s2); void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags); -LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize); +LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize); -LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0, - LLVMValueRef src1, LLVMValueRef src2, - unsigned bitsize); +LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1, + LLVMValueRef src2, unsigned bitsize); -LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize); +LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize); -LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize); +LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize); LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0); -LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, - LLVMValueRef src0); +LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0); -void ac_optimize_vs_outputs(struct ac_llvm_context *ac, - LLVMValueRef main_fn, - uint8_t *vs_output_param_offset, - uint32_t num_outputs, - uint32_t skip_output_mask, - uint8_t *num_param_exports); +void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn, + uint8_t *vs_output_param_offset, uint32_t num_outputs, + uint32_t skip_output_mask, uint8_t *num_param_exports); void ac_init_exec_full_mask(struct ac_llvm_context *ctx); void ac_declare_lds_as_pointer(struct ac_llvm_context *ac); -LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, - LLVMValueRef dw_addr); -void ac_lds_store(struct ac_llvm_context *ctx, - LLVMValueRef dw_addr, LLVMValueRef value); - -LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, - LLVMTypeRef dst_type, - LLVMValueRef src0); +LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr); +void ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value); + +LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0); LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type); LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type); @@ -641,178 +481,141 @@ void ac_build_endif(struct ac_llvm_context *ctx, int lable_id); void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id); void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id); -void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, - int lable_id); -void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, - int lable_id); +void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, int lable_id); +void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, int lable_id); -LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, - const char *name); -LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, - const char *name); +LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name); +LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name); -LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, - LLVMTypeRef type); +LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMTypeRef type); -LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, - unsigned count); +LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count); -LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, - unsigned rshift, unsigned bitwidth); +LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift, + unsigned bitwidth); -void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, - LLVMValueRef *addr, bool is_array_tex); +void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr, + bool is_array_tex); -LLVMValueRef -ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask); +LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask); -LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, - LLVMValueRef src, LLVMValueRef lane); +LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMValueRef lane); -LLVMValueRef -ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane); +LLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane); -LLVMValueRef -ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane); +LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, + LLVMValueRef lane); -LLVMValueRef -ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask); +LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask); -LLVMValueRef -ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op); +LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op); -LLVMValueRef -ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op); +LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op); -LLVMValueRef -ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size); +LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, + unsigned cluster_size); /** * Common arguments for a scan/reduce operation that accumulates per-wave * values across an entire workgroup, while respecting the order of waves. */ struct ac_wg_scan { - bool enable_reduce; - bool enable_exclusive; - bool enable_inclusive; - nir_op op; - LLVMValueRef src; /* clobbered! */ - LLVMValueRef result_reduce; - LLVMValueRef result_exclusive; - LLVMValueRef result_inclusive; - LLVMValueRef extra; - LLVMValueRef waveidx; - LLVMValueRef numwaves; /* only needed for "reduce" operations */ - - /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */ - LLVMValueRef scratch; - unsigned maxwaves; -}; - -void -ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); -void -ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); -void -ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); - -void -ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); -void -ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); -void -ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); - -LLVMValueRef -ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, - unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3); - -LLVMValueRef -ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index); - -LLVMValueRef -ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize); - -LLVMValueRef -ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize); - -LLVMValueRef -ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0, - unsigned bitsize); - -LLVMValueRef -ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij); + bool enable_reduce; + bool enable_exclusive; + bool enable_inclusive; + nir_op op; + LLVMValueRef src; /* clobbered! */ + LLVMValueRef result_reduce; + LLVMValueRef result_exclusive; + LLVMValueRef result_inclusive; + LLVMValueRef extra; + LLVMValueRef waveidx; + LLVMValueRef numwaves; /* only needed for "reduce" operations */ -LLVMValueRef -ac_build_load_helper_invocation(struct ac_llvm_context *ctx); + /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */ + LLVMValueRef scratch; + unsigned maxwaves; +}; + +void ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); + +void ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); + +LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, + unsigned lane1, unsigned lane2, unsigned lane3); + +LLVMValueRef ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index); + +LLVMValueRef ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize); -LLVMValueRef -ac_build_is_helper_invocation(struct ac_llvm_context *ctx); +LLVMValueRef ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize); -LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, - LLVMValueRef *args, unsigned num_args); +LLVMValueRef ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0, + unsigned bitsize); + +LLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij); + +LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx); + +LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx); + +LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args, + unsigned num_args); LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, - LLVMValueRef ptr, LLVMValueRef val, - const char *sync_scope); + LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope); LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr, - LLVMValueRef cmp, LLVMValueRef val, - const char *sync_scope); + LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope); -void -ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, - LLVMValueRef stencil, LLVMValueRef samplemask, - struct ac_export_args *args); +void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, + LLVMValueRef samplemask, struct ac_export_args *args); void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id, - LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt); + LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt); struct ac_ngg_prim { - unsigned num_vertices; - LLVMValueRef isnull; - LLVMValueRef index[3]; - LLVMValueRef edgeflag[3]; - LLVMValueRef passthrough; + unsigned num_vertices; + LLVMValueRef isnull; + LLVMValueRef index[3]; + LLVMValueRef edgeflag[3]; + LLVMValueRef passthrough; }; -LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, - const struct ac_ngg_prim *prim); -void ac_build_export_prim(struct ac_llvm_context *ctx, - const struct ac_ngg_prim *prim); +LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim); +void ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim); -static inline LLVMValueRef -ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg) +static inline LLVMValueRef ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg) { - assert(arg.used); - return LLVMGetParam(ctx->main_function, arg.arg_index); + assert(arg.used); + return LLVMGetParam(ctx->main_function, arg.arg_index); } -enum ac_llvm_calling_convention { - AC_LLVM_AMDGPU_VS = 87, - AC_LLVM_AMDGPU_GS = 88, - AC_LLVM_AMDGPU_PS = 89, - AC_LLVM_AMDGPU_CS = 90, - AC_LLVM_AMDGPU_HS = 93, +enum ac_llvm_calling_convention +{ + AC_LLVM_AMDGPU_VS = 87, + AC_LLVM_AMDGPU_GS = 88, + AC_LLVM_AMDGPU_PS = 89, + AC_LLVM_AMDGPU_CS = 90, + AC_LLVM_AMDGPU_HS = 93, }; -LLVMValueRef ac_build_main(const struct ac_shader_args *args, - struct ac_llvm_context *ctx, - enum ac_llvm_calling_convention convention, - const char *name, LLVMTypeRef ret_type, - LLVMModuleRef module); +LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx, + enum ac_llvm_calling_convention convention, const char *name, + LLVMTypeRef ret_type, LLVMModuleRef module); void ac_build_s_endpgm(struct ac_llvm_context *ctx); -LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx, - LLVMValueRef mask, LLVMValueRef index); -LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx, - LLVMValueRef mask[2], LLVMValueRef index); -void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, - LLVMValueRef is_odd, - LLVMValueRef flatshade_first, - LLVMValueRef index[3]); +LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef index); +LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx, LLVMValueRef mask[2], + LLVMValueRef index); +void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd, + LLVMValueRef flatshade_first, + LLVMValueRef index[3]); #ifdef __cplusplus } diff -Nru mesa-20.2.1/src/amd/llvm/ac_llvm_cull.c mesa-20.2.6/src/amd/llvm/ac_llvm_cull.c --- mesa-20.2.1/src/amd/llvm/ac_llvm_cull.c 2020-10-14 17:19:10.293182800 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_llvm_cull.c 2020-12-16 21:42:03.543110000 +0000 @@ -24,205 +24,188 @@ */ #include "ac_llvm_cull.h" + #include struct ac_position_w_info { - /* If a primitive intersects the W=0 plane, it causes a reflection - * of the determinant used for face culling. Every vertex behind - * the W=0 plane negates the determinant, so having 2 vertices behind - * the plane has no effect. This is i1 true if the determinant should be - * negated. - */ - LLVMValueRef w_reflection; - - /* If we simplify the "-w <= p <= w" view culling equation, we get - * "-w <= w", which can't be satisfied when w is negative. - * In perspective projection, a negative W means that the primitive - * is behind the viewer, but the equation is independent of the type - * of projection. - * - * w_accepted is false when all W are negative and therefore - * the primitive is invisible. - */ - LLVMValueRef w_accepted; + /* If a primitive intersects the W=0 plane, it causes a reflection + * of the determinant used for face culling. Every vertex behind + * the W=0 plane negates the determinant, so having 2 vertices behind + * the plane has no effect. This is i1 true if the determinant should be + * negated. + */ + LLVMValueRef w_reflection; + + /* If we simplify the "-w <= p <= w" view culling equation, we get + * "-w <= w", which can't be satisfied when w is negative. + * In perspective projection, a negative W means that the primitive + * is behind the viewer, but the equation is independent of the type + * of projection. + * + * w_accepted is false when all W are negative and therefore + * the primitive is invisible. + */ + LLVMValueRef w_accepted; - LLVMValueRef all_w_positive; - LLVMValueRef any_w_negative; + LLVMValueRef all_w_positive; + LLVMValueRef any_w_negative; }; -static void ac_analyze_position_w(struct ac_llvm_context *ctx, - LLVMValueRef pos[3][4], - struct ac_position_w_info *w) +static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], + struct ac_position_w_info *w) { - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef all_w_negative = ctx->i1true; + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef all_w_negative = ctx->i1true; - w->w_reflection = ctx->i1false; - w->any_w_negative = ctx->i1false; + w->w_reflection = ctx->i1false; + w->any_w_negative = ctx->i1false; - for (unsigned i = 0; i < 3; i++) { - LLVMValueRef neg_w; + for (unsigned i = 0; i < 3; i++) { + LLVMValueRef neg_w; - neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, ""); - /* If neg_w is true, negate w_reflection. */ - w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, ""); - w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, ""); - all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, ""); - } - w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, ""); - w->w_accepted = LLVMBuildNot(builder, all_w_negative, ""); + neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, ""); + /* If neg_w is true, negate w_reflection. */ + w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, ""); + w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, ""); + all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, ""); + } + w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, ""); + w->w_accepted = LLVMBuildNot(builder, all_w_negative, ""); } /* Perform front/back face culling and return true if the primitive is accepted. */ -static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, - LLVMValueRef pos[3][4], - struct ac_position_w_info *w, - bool cull_front, - bool cull_back, - bool cull_zero_area) +static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], + struct ac_position_w_info *w, bool cull_front, bool cull_back, + bool cull_zero_area) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ctx->builder; - if (cull_front && cull_back) - return ctx->i1false; + if (cull_front && cull_back) + return ctx->i1false; - if (!cull_front && !cull_back && !cull_zero_area) - return ctx->i1true; + if (!cull_front && !cull_back && !cull_zero_area) + return ctx->i1true; - /* Front/back face culling. Also if the determinant == 0, the triangle - * area is 0. - */ - LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], ""); - LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], ""); - LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], ""); - LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], ""); - LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, ""); - LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, ""); - LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, ""); - - /* Negative W negates the determinant. */ - det = LLVMBuildSelect(builder, w->w_reflection, - LLVMBuildFNeg(builder, det, ""), - det, ""); - - LLVMValueRef accepted = NULL; - if (cull_front) { - LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE; - accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); - } else if (cull_back) { - LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE; - accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); - } else if (cull_zero_area) { - accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, ""); - } - return accepted; + /* Front/back face culling. Also if the determinant == 0, the triangle + * area is 0. + */ + LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], ""); + LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], ""); + LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], ""); + LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], ""); + LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, ""); + LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, ""); + LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, ""); + + /* Negative W negates the determinant. */ + det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, ""); + + LLVMValueRef accepted = NULL; + if (cull_front) { + LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE; + accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); + } else if (cull_back) { + LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE; + accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); + } else if (cull_zero_area) { + accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, ""); + } + return accepted; } /* Perform view culling and small primitive elimination and return true * if the primitive is accepted and initially_accepted == true. */ -static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, - LLVMValueRef pos[3][4], - LLVMValueRef initially_accepted, - struct ac_position_w_info *w, - LLVMValueRef vp_scale[2], - LLVMValueRef vp_translate[2], - LLVMValueRef small_prim_precision, - bool cull_view_xy, - bool cull_view_near_z, - bool cull_view_far_z, - bool cull_small_prims, - bool use_halfz_clip_space) +static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], + LLVMValueRef initially_accepted, struct ac_position_w_info *w, + LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], + LLVMValueRef small_prim_precision, bool cull_view_xy, + bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims, + bool use_halfz_clip_space) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ctx->builder; - if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims) - return initially_accepted; + if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims) + return initially_accepted; - /* Skip the culling if the primitive has already been rejected or - * if any W is negative. The bounding box culling doesn't work when - * W is negative. - */ - LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted, - w->all_w_positive, ""); - LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, ""); - LLVMBuildStore(builder, initially_accepted, accepted_var); - - ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */); - { - LLVMValueRef bbox_min[3], bbox_max[3]; - LLVMValueRef accepted = initially_accepted; - - /* Compute the primitive bounding box for easy culling. */ - for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) { - bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]); - bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]); - - bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]); - bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]); - } - - /* View culling. */ - if (cull_view_xy || cull_view_near_z || cull_view_far_z) { - for (unsigned chan = 0; chan < 3; chan++) { - LLVMValueRef visible; - - if ((cull_view_xy && chan <= 1) || - (cull_view_near_z && chan == 2)) { - float t = chan == 2 && use_halfz_clip_space ? 0 : -1; - visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan], - LLVMConstReal(ctx->f32, t), ""); - accepted = LLVMBuildAnd(builder, accepted, visible, ""); - } - - if ((cull_view_xy && chan <= 1) || - (cull_view_far_z && chan == 2)) { - visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], - ctx->f32_1, ""); - accepted = LLVMBuildAnd(builder, accepted, visible, ""); - } - } - } - - /* Small primitive elimination. */ - if (cull_small_prims) { - /* Assuming a sample position at (0.5, 0.5), if we round - * the bounding box min/max extents and the results of - * the rounding are equal in either the X or Y direction, - * the bounding box does not intersect the sample. - * - * See these GDC slides for pictures: - * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf - */ - LLVMValueRef min, max, not_equal[2], visible; - - for (unsigned chan = 0; chan < 2; chan++) { - /* Convert the position to screen-space coordinates. */ - min = ac_build_fmad(ctx, bbox_min[chan], - vp_scale[chan], vp_translate[chan]); - max = ac_build_fmad(ctx, bbox_max[chan], - vp_scale[chan], vp_translate[chan]); - /* Scale the bounding box according to the precision of - * the rasterizer and the number of MSAA samples. */ - min = LLVMBuildFSub(builder, min, small_prim_precision, ""); - max = LLVMBuildFAdd(builder, max, small_prim_precision, ""); - - /* Determine if the bbox intersects the sample point. - * It also works for MSAA, but vp_scale, vp_translate, - * and small_prim_precision are computed differently. - */ - min = ac_build_round(ctx, min); - max = ac_build_round(ctx, max); - not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, ""); - } - visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], ""); - accepted = LLVMBuildAnd(builder, accepted, visible, ""); - } - - LLVMBuildStore(builder, accepted, accepted_var); - } - ac_build_endif(ctx, 10000000); + /* Skip the culling if the primitive has already been rejected or + * if any W is negative. The bounding box culling doesn't work when + * W is negative. + */ + LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted, w->all_w_positive, ""); + LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, ""); + LLVMBuildStore(builder, initially_accepted, accepted_var); + + ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */); + { + LLVMValueRef bbox_min[3], bbox_max[3]; + LLVMValueRef accepted = initially_accepted; + + /* Compute the primitive bounding box for easy culling. */ + for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) { + bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]); + bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]); + + bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]); + bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]); + } + + /* View culling. */ + if (cull_view_xy || cull_view_near_z || cull_view_far_z) { + for (unsigned chan = 0; chan < 3; chan++) { + LLVMValueRef visible; + + if ((cull_view_xy && chan <= 1) || (cull_view_near_z && chan == 2)) { + float t = chan == 2 && use_halfz_clip_space ? 0 : -1; + visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan], + LLVMConstReal(ctx->f32, t), ""); + accepted = LLVMBuildAnd(builder, accepted, visible, ""); + } + + if ((cull_view_xy && chan <= 1) || (cull_view_far_z && chan == 2)) { + visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, ""); + accepted = LLVMBuildAnd(builder, accepted, visible, ""); + } + } + } + + /* Small primitive elimination. */ + if (cull_small_prims) { + /* Assuming a sample position at (0.5, 0.5), if we round + * the bounding box min/max extents and the results of + * the rounding are equal in either the X or Y direction, + * the bounding box does not intersect the sample. + * + * See these GDC slides for pictures: + * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf + */ + LLVMValueRef min, max, not_equal[2], visible; + + for (unsigned chan = 0; chan < 2; chan++) { + /* Convert the position to screen-space coordinates. */ + min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]); + max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]); + /* Scale the bounding box according to the precision of + * the rasterizer and the number of MSAA samples. */ + min = LLVMBuildFSub(builder, min, small_prim_precision, ""); + max = LLVMBuildFAdd(builder, max, small_prim_precision, ""); + + /* Determine if the bbox intersects the sample point. + * It also works for MSAA, but vp_scale, vp_translate, + * and small_prim_precision are computed differently. + */ + min = ac_build_round(ctx, min); + max = ac_build_round(ctx, max); + not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, ""); + } + visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], ""); + accepted = LLVMBuildAnd(builder, accepted, visible, ""); + } + + LLVMBuildStore(builder, accepted, accepted_var); + } + ac_build_endif(ctx, 10000000); - return LLVMBuildLoad(builder, accepted_var, ""); + return LLVMBuildLoad(builder, accepted_var, ""); } /** @@ -241,35 +224,27 @@ * subpixel_bits are defined by the quantization mode. * \param options See ac_cull_options. */ -LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, - LLVMValueRef pos[3][4], - LLVMValueRef initially_accepted, - LLVMValueRef vp_scale[2], - LLVMValueRef vp_translate[2], - LLVMValueRef small_prim_precision, - struct ac_cull_options *options) +LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], + LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], + LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, + struct ac_cull_options *options) { - struct ac_position_w_info w; - ac_analyze_position_w(ctx, pos, &w); + struct ac_position_w_info w; + ac_analyze_position_w(ctx, pos, &w); - /* W culling. */ - LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true; - accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, ""); - - /* Face culling. */ - accepted = LLVMBuildAnd(ctx->builder, accepted, - ac_cull_face(ctx, pos, &w, - options->cull_front, - options->cull_back, - options->cull_zero_area), ""); - - /* View culling and small primitive elimination. */ - accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, - small_prim_precision, - options->cull_view_xy, - options->cull_view_near_z, - options->cull_view_far_z, - options->cull_small_prims, - options->use_halfz_clip_space); - return accepted; + /* W culling. */ + LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true; + accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, ""); + + /* Face culling. */ + accepted = LLVMBuildAnd( + ctx->builder, accepted, + ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area), + ""); + + /* View culling and small primitive elimination. */ + accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, + options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z, + options->cull_small_prims, options->use_halfz_clip_space); + return accepted; } diff -Nru mesa-20.2.1/src/amd/llvm/ac_llvm_cull.h mesa-20.2.6/src/amd/llvm/ac_llvm_cull.h --- mesa-20.2.1/src/amd/llvm/ac_llvm_cull.h 2020-10-14 17:19:10.293182800 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_llvm_cull.h 2020-12-16 21:42:03.543110000 +0000 @@ -29,31 +29,28 @@ #include "ac_llvm_build.h" struct ac_cull_options { - /* In general, I recommend setting all to true except view Z culling, - * which isn't so effective because W culling is cheaper and partially - * replaces near Z culling, and you don't need to set Position.z - * if Z culling is disabled. - * - * If something doesn't work, turn some of these off to find out what. - */ - bool cull_front; - bool cull_back; - bool cull_view_xy; - bool cull_view_near_z; - bool cull_view_far_z; - bool cull_small_prims; - bool cull_zero_area; - bool cull_w; /* cull primitives with all W < 0 */ + /* In general, I recommend setting all to true except view Z culling, + * which isn't so effective because W culling is cheaper and partially + * replaces near Z culling, and you don't need to set Position.z + * if Z culling is disabled. + * + * If something doesn't work, turn some of these off to find out what. + */ + bool cull_front; + bool cull_back; + bool cull_view_xy; + bool cull_view_near_z; + bool cull_view_far_z; + bool cull_small_prims; + bool cull_zero_area; + bool cull_w; /* cull primitives with all W < 0 */ - bool use_halfz_clip_space; + bool use_halfz_clip_space; }; -LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, - LLVMValueRef pos[3][4], - LLVMValueRef initially_accepted, - LLVMValueRef vp_scale[2], - LLVMValueRef vp_translate[2], - LLVMValueRef small_prim_precision, - struct ac_cull_options *options); +LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], + LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], + LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, + struct ac_cull_options *options); #endif diff -Nru mesa-20.2.1/src/amd/llvm/ac_llvm_util.c mesa-20.2.6/src/amd/llvm/ac_llvm_util.c --- mesa-20.2.1/src/amd/llvm/ac_llvm_util.c 2020-10-14 17:19:10.293182800 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_llvm_util.c 2020-12-16 21:42:03.544110000 +0000 @@ -24,16 +24,17 @@ */ /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ #include "ac_llvm_util.h" + #include "ac_llvm_build.h" +#include "c11/threads.h" +#include "gallivm/lp_bld_misc.h" #include "util/bitscan.h" +#include "util/u_math.h" #include #include #include #include #include -#include "c11/threads.h" -#include "gallivm/lp_bld_misc.h" -#include "util/u_math.h" #include #include @@ -41,239 +42,240 @@ static void ac_init_llvm_target() { - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeAMDGPUAsmPrinter(); - - /* For inline assembly. */ - LLVMInitializeAMDGPUAsmParser(); - - /* For ACO disassembly. */ - LLVMInitializeAMDGPUDisassembler(); - - /* Workaround for bug in llvm 4.0 that causes image intrinsics - * to disappear. - * https://reviews.llvm.org/D26348 - * - * "mesa" is the prefix for error messages. - * - * -global-isel-abort=2 is a no-op unless global isel has been enabled. - * This option tells the backend to fall-back to SelectionDAG and print - * a diagnostic message if global isel fails. - */ - const char *argv[] = { - "mesa", - "-simplifycfg-sink-common=false", - "-global-isel-abort=2", + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmPrinter(); + + /* For inline assembly. */ + LLVMInitializeAMDGPUAsmParser(); + + /* For ACO disassembly. */ + LLVMInitializeAMDGPUDisassembler(); + + /* Workaround for bug in llvm 4.0 that causes image intrinsics + * to disappear. + * https://reviews.llvm.org/D26348 + * + * "mesa" is the prefix for error messages. + * + * -global-isel-abort=2 is a no-op unless global isel has been enabled. + * This option tells the backend to fall-back to SelectionDAG and print + * a diagnostic message if global isel fails. + */ + const char *argv[] = { + "mesa", + "-simplifycfg-sink-common=false", + "-global-isel-abort=2", #if LLVM_VERSION_MAJOR >= 10 - /* Atomic optimizations require LLVM 10.0 for gfx10 support. */ - "-amdgpu-atomic-optimizations=true", + /* Atomic optimizations require LLVM 10.0 for gfx10 support. */ + "-amdgpu-atomic-optimizations=true", #endif #if LLVM_VERSION_MAJOR >= 11 - /* This was disabled by default in: https://reviews.llvm.org/D77228 */ - "-structurizecfg-skip-uniform-regions", + /* This was disabled by default in: https://reviews.llvm.org/D77228 */ + "-structurizecfg-skip-uniform-regions", #endif - }; - LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL); + }; + LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL); } PUBLIC void ac_init_shared_llvm_once(void) { - static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; - call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); + static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; + call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); } #if !LLVM_IS_SHARED static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT; static void ac_init_static_llvm_once(void) { - call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target); + call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target); } #endif void ac_init_llvm_once(void) { #if LLVM_IS_SHARED - ac_init_shared_llvm_once(); + ac_init_shared_llvm_once(); #else - ac_init_static_llvm_once(); + ac_init_static_llvm_once(); #endif } static LLVMTargetRef ac_get_llvm_target(const char *triple) { - LLVMTargetRef target = NULL; - char *err_message = NULL; + LLVMTargetRef target = NULL; + char *err_message = NULL; - if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { - fprintf(stderr, "Cannot find target for triple %s ", triple); - if (err_message) { - fprintf(stderr, "%s\n", err_message); - } - LLVMDisposeMessage(err_message); - return NULL; - } - return target; + if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { + fprintf(stderr, "Cannot find target for triple %s ", triple); + if (err_message) { + fprintf(stderr, "%s\n", err_message); + } + LLVMDisposeMessage(err_message); + return NULL; + } + return target; } const char *ac_get_llvm_processor_name(enum radeon_family family) { - switch (family) { - case CHIP_TAHITI: - return "tahiti"; - case CHIP_PITCAIRN: - return "pitcairn"; - case CHIP_VERDE: - return "verde"; - case CHIP_OLAND: - return "oland"; - case CHIP_HAINAN: - return "hainan"; - case CHIP_BONAIRE: - return "bonaire"; - case CHIP_KABINI: - return "kabini"; - case CHIP_KAVERI: - return "kaveri"; - case CHIP_HAWAII: - return "hawaii"; - case CHIP_TONGA: - return "tonga"; - case CHIP_ICELAND: - return "iceland"; - case CHIP_CARRIZO: - return "carrizo"; - case CHIP_FIJI: - return "fiji"; - case CHIP_STONEY: - return "stoney"; - case CHIP_POLARIS10: - return "polaris10"; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - return "polaris11"; - case CHIP_VEGA10: - return "gfx900"; - case CHIP_RAVEN: - return "gfx902"; - case CHIP_VEGA12: - return "gfx904"; - case CHIP_VEGA20: - return "gfx906"; - case CHIP_RAVEN2: - case CHIP_RENOIR: - return "gfx909"; - case CHIP_ARCTURUS: - return "gfx908"; - case CHIP_NAVI10: - return "gfx1010"; - case CHIP_NAVI12: - return "gfx1011"; - case CHIP_NAVI14: - return "gfx1012"; - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - return "gfx1030"; - default: - return ""; - } + switch (family) { + case CHIP_TAHITI: + return "tahiti"; + case CHIP_PITCAIRN: + return "pitcairn"; + case CHIP_VERDE: + return "verde"; + case CHIP_OLAND: + return "oland"; + case CHIP_HAINAN: + return "hainan"; + case CHIP_BONAIRE: + return "bonaire"; + case CHIP_KABINI: + return "kabini"; + case CHIP_KAVERI: + return "kaveri"; + case CHIP_HAWAII: + return "hawaii"; + case CHIP_TONGA: + return "tonga"; + case CHIP_ICELAND: + return "iceland"; + case CHIP_CARRIZO: + return "carrizo"; + case CHIP_FIJI: + return "fiji"; + case CHIP_STONEY: + return "stoney"; + case CHIP_POLARIS10: + return "polaris10"; + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + return "polaris11"; + case CHIP_VEGA10: + return "gfx900"; + case CHIP_RAVEN: + return "gfx902"; + case CHIP_VEGA12: + return "gfx904"; + case CHIP_VEGA20: + return "gfx906"; + case CHIP_RAVEN2: + case CHIP_RENOIR: + return "gfx909"; + case CHIP_ARCTURUS: + return "gfx908"; + case CHIP_NAVI10: + return "gfx1010"; + case CHIP_NAVI12: + return "gfx1011"; + case CHIP_NAVI14: + return "gfx1012"; + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + return "gfx1030"; + default: + return ""; + } } static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, - enum ac_target_machine_options tm_options, - LLVMCodeGenOptLevel level, - const char **out_triple) -{ - assert(family >= CHIP_TAHITI); - char features[256]; - const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; - LLVMTargetRef target = ac_get_llvm_target(triple); - - snprintf(features, sizeof(features), - "+DumpCode%s%s%s%s%s", - LLVM_VERSION_MAJOR >= 11 ? "" : ",-fp32-denormals,+fp64-denormals", - family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ? - ",+wavefrontsize64,-wavefrontsize32" : "", - family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "", - family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "", - tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : ""); - - LLVMTargetMachineRef tm = LLVMCreateTargetMachine( - target, - triple, - ac_get_llvm_processor_name(family), - features, - level, - LLVMRelocDefault, - LLVMCodeModelDefault); - - if (out_triple) - *out_triple = triple; - if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL) - ac_enable_global_isel(tm); - return tm; + enum ac_target_machine_options tm_options, + LLVMCodeGenOptLevel level, + const char **out_triple) +{ + assert(family >= CHIP_TAHITI); + char features[256]; + const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; + LLVMTargetRef target = ac_get_llvm_target(triple); + + snprintf(features, sizeof(features), "+DumpCode%s%s%s%s%s", + LLVM_VERSION_MAJOR >= 11 ? "" : ",-fp32-denormals,+fp64-denormals", + family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) + ? ",+wavefrontsize64,-wavefrontsize32" + : "", + family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "", + family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "", + tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : ""); + + LLVMTargetMachineRef tm = + LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), features, level, + LLVMRelocDefault, LLVMCodeModelDefault); + + if (out_triple) + *out_triple = triple; + if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL) + ac_enable_global_isel(tm); + return tm; } static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, - bool check_ir) + bool check_ir) { - LLVMPassManagerRef passmgr = LLVMCreatePassManager(); - if (!passmgr) - return NULL; - - if (target_library_info) - LLVMAddTargetLibraryInfo(target_library_info, - passmgr); - - if (check_ir) - LLVMAddVerifierPass(passmgr); - LLVMAddAlwaysInlinerPass(passmgr); - /* Normally, the pass manager runs all passes on one function before - * moving onto another. Adding a barrier no-op pass forces the pass - * manager to run the inliner on all functions first, which makes sure - * that the following passes are only run on the remaining non-inline - * function, so it removes useless work done on dead inline functions. - */ - ac_llvm_add_barrier_noop_pass(passmgr); - /* This pass should eliminate all the load and store instructions. */ - LLVMAddPromoteMemoryToRegisterPass(passmgr); - LLVMAddScalarReplAggregatesPass(passmgr); - LLVMAddLICMPass(passmgr); - LLVMAddAggressiveDCEPass(passmgr); - LLVMAddCFGSimplificationPass(passmgr); - /* This is recommended by the instruction combining pass. */ - LLVMAddEarlyCSEMemSSAPass(passmgr); - LLVMAddInstructionCombiningPass(passmgr); - return passmgr; + LLVMPassManagerRef passmgr = LLVMCreatePassManager(); + if (!passmgr) + return NULL; + + if (target_library_info) + LLVMAddTargetLibraryInfo(target_library_info, passmgr); + + if (check_ir) + LLVMAddVerifierPass(passmgr); + LLVMAddAlwaysInlinerPass(passmgr); + /* Normally, the pass manager runs all passes on one function before + * moving onto another. Adding a barrier no-op pass forces the pass + * manager to run the inliner on all functions first, which makes sure + * that the following passes are only run on the remaining non-inline + * function, so it removes useless work done on dead inline functions. + */ + ac_llvm_add_barrier_noop_pass(passmgr); + /* This pass should eliminate all the load and store instructions. */ + LLVMAddPromoteMemoryToRegisterPass(passmgr); + LLVMAddScalarReplAggregatesPass(passmgr); + LLVMAddLICMPass(passmgr); + LLVMAddAggressiveDCEPass(passmgr); + LLVMAddCFGSimplificationPass(passmgr); + /* This is recommended by the instruction combining pass. */ + LLVMAddEarlyCSEMemSSAPass(passmgr); + LLVMAddInstructionCombiningPass(passmgr); + return passmgr; } static const char *attr_to_str(enum ac_func_attr attr) { switch (attr) { - case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; - case AC_FUNC_ATTR_INREG: return "inreg"; - case AC_FUNC_ATTR_NOALIAS: return "noalias"; - case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; - case AC_FUNC_ATTR_READNONE: return "readnone"; - case AC_FUNC_ATTR_READONLY: return "readonly"; - case AC_FUNC_ATTR_WRITEONLY: return "writeonly"; - case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly"; - case AC_FUNC_ATTR_CONVERGENT: return "convergent"; + case AC_FUNC_ATTR_ALWAYSINLINE: + return "alwaysinline"; + case AC_FUNC_ATTR_INREG: + return "inreg"; + case AC_FUNC_ATTR_NOALIAS: + return "noalias"; + case AC_FUNC_ATTR_NOUNWIND: + return "nounwind"; + case AC_FUNC_ATTR_READNONE: + return "readnone"; + case AC_FUNC_ATTR_READONLY: + return "readonly"; + case AC_FUNC_ATTR_WRITEONLY: + return "writeonly"; + case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: + return "inaccessiblememonly"; + case AC_FUNC_ATTR_CONVERGENT: + return "convergent"; default: - fprintf(stderr, "Unhandled function attribute: %x\n", attr); - return 0; + fprintf(stderr, "Unhandled function attribute: %x\n", attr); + return 0; } } -void -ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, - int attr_idx, enum ac_func_attr attr) +void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, + enum ac_func_attr attr) { const char *attr_name = attr_to_str(attr); - unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, - strlen(attr_name)); + unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); if (LLVMIsAFunction(function)) @@ -282,138 +284,124 @@ LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); } -void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, - unsigned attrib_mask) +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask) { - attrib_mask |= AC_FUNC_ATTR_NOUNWIND; - attrib_mask &= ~AC_FUNC_ATTR_LEGACY; + attrib_mask |= AC_FUNC_ATTR_NOUNWIND; + attrib_mask &= ~AC_FUNC_ATTR_LEGACY; - while (attrib_mask) { - enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); - ac_add_function_attr(ctx, function, -1, attr); - } + while (attrib_mask) { + enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); + ac_add_function_attr(ctx, function, -1, attr); + } } -void -ac_dump_module(LLVMModuleRef module) +void ac_dump_module(LLVMModuleRef module) { - char *str = LLVMPrintModuleToString(module); - fprintf(stderr, "%s", str); - LLVMDisposeMessage(str); + char *str = LLVMPrintModuleToString(module); + fprintf(stderr, "%s", str); + LLVMDisposeMessage(str); } -void -ac_llvm_add_target_dep_function_attr(LLVMValueRef F, - const char *name, unsigned value) +void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value) { - char str[16]; + char str[16]; - snprintf(str, sizeof(str), "0x%x", value); - LLVMAddTargetDependentFunctionAttr(F, name, str); + snprintf(str, sizeof(str), "0x%x", value); + LLVMAddTargetDependentFunctionAttr(F, name, str); } void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size) { - if (!size) - return; + if (!size) + return; + + char str[32]; + snprintf(str, sizeof(str), "%u,%u", size, size); + LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str); +} + +unsigned ac_count_scratch_private_memory(LLVMValueRef function) +{ + unsigned private_mem_vgprs = 0; + + /* Process all LLVM instructions. */ + LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function); + while (bb) { + LLVMValueRef next = LLVMGetFirstInstruction(bb); + + while (next) { + LLVMValueRef inst = next; + next = LLVMGetNextInstruction(next); + + if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) + continue; + + LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); + /* No idea why LLVM aligns allocas to 4 elements. */ + unsigned alignment = LLVMGetAlignment(inst); + unsigned dw_size = align(ac_get_type_size(type) / 4, alignment); + private_mem_vgprs += dw_size; + } + bb = LLVMGetNextBasicBlock(bb); + } + + return private_mem_vgprs; +} + +bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family, + enum ac_target_machine_options tm_options) +{ + const char *triple; + memset(compiler, 0, sizeof(*compiler)); + + compiler->tm = ac_create_target_machine(family, tm_options, LLVMCodeGenLevelDefault, &triple); + if (!compiler->tm) + return false; + + if (tm_options & AC_TM_CREATE_LOW_OPT) { + compiler->low_opt_tm = + ac_create_target_machine(family, tm_options, LLVMCodeGenLevelLess, NULL); + if (!compiler->low_opt_tm) + goto fail; + } + + if (family >= CHIP_NAVI10) { + assert(!(tm_options & AC_TM_CREATE_LOW_OPT)); + compiler->tm_wave32 = + ac_create_target_machine(family, tm_options | AC_TM_WAVE32, LLVMCodeGenLevelDefault, NULL); + if (!compiler->tm_wave32) + goto fail; + } - char str[32]; - snprintf(str, sizeof(str), "%u,%u", size, size); - LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str); -} - -unsigned -ac_count_scratch_private_memory(LLVMValueRef function) -{ - unsigned private_mem_vgprs = 0; - - /* Process all LLVM instructions. */ - LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function); - while (bb) { - LLVMValueRef next = LLVMGetFirstInstruction(bb); - - while (next) { - LLVMValueRef inst = next; - next = LLVMGetNextInstruction(next); - - if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) - continue; - - LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); - /* No idea why LLVM aligns allocas to 4 elements. */ - unsigned alignment = LLVMGetAlignment(inst); - unsigned dw_size = align(ac_get_type_size(type) / 4, alignment); - private_mem_vgprs += dw_size; - } - bb = LLVMGetNextBasicBlock(bb); - } - - return private_mem_vgprs; -} - -bool -ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, - enum radeon_family family, - enum ac_target_machine_options tm_options) -{ - const char *triple; - memset(compiler, 0, sizeof(*compiler)); - - compiler->tm = ac_create_target_machine(family, tm_options, - LLVMCodeGenLevelDefault, - &triple); - if (!compiler->tm) - return false; - - if (tm_options & AC_TM_CREATE_LOW_OPT) { - compiler->low_opt_tm = - ac_create_target_machine(family, tm_options, - LLVMCodeGenLevelLess, NULL); - if (!compiler->low_opt_tm) - goto fail; - } - - if (family >= CHIP_NAVI10) { - assert(!(tm_options & AC_TM_CREATE_LOW_OPT)); - compiler->tm_wave32 = ac_create_target_machine(family, - tm_options | AC_TM_WAVE32, - LLVMCodeGenLevelDefault, - NULL); - if (!compiler->tm_wave32) - goto fail; - } - - compiler->target_library_info = - ac_create_target_library_info(triple); - if (!compiler->target_library_info) - goto fail; - - compiler->passmgr = ac_create_passmgr(compiler->target_library_info, - tm_options & AC_TM_CHECK_IR); - if (!compiler->passmgr) - goto fail; + compiler->target_library_info = ac_create_target_library_info(triple); + if (!compiler->target_library_info) + goto fail; + + compiler->passmgr = + ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR); + if (!compiler->passmgr) + goto fail; - return true; + return true; fail: - ac_destroy_llvm_compiler(compiler); - return false; + ac_destroy_llvm_compiler(compiler); + return false; } -void -ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) +void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) { - ac_destroy_llvm_passes(compiler->passes); - ac_destroy_llvm_passes(compiler->passes_wave32); - ac_destroy_llvm_passes(compiler->low_opt_passes); - - if (compiler->passmgr) - LLVMDisposePassManager(compiler->passmgr); - if (compiler->target_library_info) - ac_dispose_target_library_info(compiler->target_library_info); - if (compiler->low_opt_tm) - LLVMDisposeTargetMachine(compiler->low_opt_tm); - if (compiler->tm) - LLVMDisposeTargetMachine(compiler->tm); - if (compiler->tm_wave32) - LLVMDisposeTargetMachine(compiler->tm_wave32); + ac_destroy_llvm_passes(compiler->passes); + ac_destroy_llvm_passes(compiler->passes_wave32); + ac_destroy_llvm_passes(compiler->low_opt_passes); + + if (compiler->passmgr) + LLVMDisposePassManager(compiler->passmgr); + if (compiler->target_library_info) + ac_dispose_target_library_info(compiler->target_library_info); + if (compiler->low_opt_tm) + LLVMDisposeTargetMachine(compiler->low_opt_tm); + if (compiler->tm) + LLVMDisposeTargetMachine(compiler->tm); + if (compiler->tm_wave32) + LLVMDisposeTargetMachine(compiler->tm_wave32); } diff -Nru mesa-20.2.1/src/amd/llvm/ac_llvm_util.h mesa-20.2.6/src/amd/llvm/ac_llvm_util.h --- mesa-20.2.1/src/amd/llvm/ac_llvm_util.h 2020-10-14 17:19:10.293182800 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_llvm_util.h 2020-12-16 21:42:03.544110000 +0000 @@ -26,11 +26,11 @@ #ifndef AC_LLVM_UTIL_H #define AC_LLVM_UTIL_H -#include +#include "amd_family.h" #include #include -#include "amd_family.h" +#include #ifdef __cplusplus extern "C" { @@ -38,122 +38,115 @@ struct ac_compiler_passes; -enum ac_func_attr { - AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), - AC_FUNC_ATTR_INREG = (1 << 2), - AC_FUNC_ATTR_NOALIAS = (1 << 3), - AC_FUNC_ATTR_NOUNWIND = (1 << 4), - AC_FUNC_ATTR_READNONE = (1 << 5), - AC_FUNC_ATTR_READONLY = (1 << 6), - AC_FUNC_ATTR_WRITEONLY = (1 << 7), - AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8), - AC_FUNC_ATTR_CONVERGENT = (1 << 9), - - /* Legacy intrinsic that needs attributes on function declarations - * and they must match the internal LLVM definition exactly, otherwise - * intrinsic selection fails. - */ - AC_FUNC_ATTR_LEGACY = (1u << 31), +enum ac_func_attr +{ + AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), + AC_FUNC_ATTR_INREG = (1 << 2), + AC_FUNC_ATTR_NOALIAS = (1 << 3), + AC_FUNC_ATTR_NOUNWIND = (1 << 4), + AC_FUNC_ATTR_READNONE = (1 << 5), + AC_FUNC_ATTR_READONLY = (1 << 6), + AC_FUNC_ATTR_WRITEONLY = (1 << 7), + AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8), + AC_FUNC_ATTR_CONVERGENT = (1 << 9), + + /* Legacy intrinsic that needs attributes on function declarations + * and they must match the internal LLVM definition exactly, otherwise + * intrinsic selection fails. + */ + AC_FUNC_ATTR_LEGACY = (1u << 31), }; -enum ac_target_machine_options { - AC_TM_SUPPORTS_SPILL = (1 << 0), - AC_TM_FORCE_ENABLE_XNACK = (1 << 1), - AC_TM_FORCE_DISABLE_XNACK = (1 << 2), - AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3), - AC_TM_CHECK_IR = (1 << 4), - AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5), - AC_TM_CREATE_LOW_OPT = (1 << 6), - AC_TM_WAVE32 = (1 << 7), +enum ac_target_machine_options +{ + AC_TM_SUPPORTS_SPILL = (1 << 0), + AC_TM_FORCE_ENABLE_XNACK = (1 << 1), + AC_TM_FORCE_DISABLE_XNACK = (1 << 2), + AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3), + AC_TM_CHECK_IR = (1 << 4), + AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5), + AC_TM_CREATE_LOW_OPT = (1 << 6), + AC_TM_WAVE32 = (1 << 7), }; -enum ac_float_mode { - AC_FLOAT_MODE_DEFAULT, - AC_FLOAT_MODE_DEFAULT_OPENGL, - AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO, +enum ac_float_mode +{ + AC_FLOAT_MODE_DEFAULT, + AC_FLOAT_MODE_DEFAULT_OPENGL, + AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO, }; /* Per-thread persistent LLVM objects. */ struct ac_llvm_compiler { - LLVMTargetLibraryInfoRef target_library_info; - LLVMPassManagerRef passmgr; + LLVMTargetLibraryInfoRef target_library_info; + LLVMPassManagerRef passmgr; - /* Default compiler. */ - LLVMTargetMachineRef tm; - struct ac_compiler_passes *passes; - - /* Wave32 compiler for GFX10. */ - LLVMTargetMachineRef tm_wave32; - struct ac_compiler_passes *passes_wave32; - - /* Optional compiler for faster compilation with fewer optimizations. - * LLVM modules can be created with "tm" too. There is no difference. - */ - LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */ - struct ac_compiler_passes *low_opt_passes; + /* Default compiler. */ + LLVMTargetMachineRef tm; + struct ac_compiler_passes *passes; + + /* Wave32 compiler for GFX10. */ + LLVMTargetMachineRef tm_wave32; + struct ac_compiler_passes *passes_wave32; + + /* Optional compiler for faster compilation with fewer optimizations. + * LLVM modules can be created with "tm" too. There is no difference. + */ + LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */ + struct ac_compiler_passes *low_opt_passes; }; const char *ac_get_llvm_processor_name(enum radeon_family family); void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); -void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, - int attr_idx, enum ac_func_attr attr); -void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, - unsigned attrib_mask); +void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, + enum ac_func_attr attr); +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask); void ac_dump_module(LLVMModuleRef module); LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call); bool ac_llvm_is_function(LLVMValueRef v); LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx); -LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, - enum ac_float_mode float_mode); +LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode); -void -ac_llvm_add_target_dep_function_attr(LLVMValueRef F, - const char *name, unsigned value); +void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value); void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size); -static inline unsigned -ac_get_load_intr_attribs(bool can_speculate) +static inline unsigned ac_get_load_intr_attribs(bool can_speculate) { - /* READNONE means writes can't affect it, while READONLY means that - * writes can affect it. */ - return can_speculate ? AC_FUNC_ATTR_READNONE : - AC_FUNC_ATTR_READONLY; + /* READNONE means writes can't affect it, while READONLY means that + * writes can affect it. */ + return can_speculate ? AC_FUNC_ATTR_READNONE : AC_FUNC_ATTR_READONLY; } -unsigned -ac_count_scratch_private_memory(LLVMValueRef function); +unsigned ac_count_scratch_private_memory(LLVMValueRef function); LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple); void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info); void ac_init_shared_llvm_once(void); /* Do not use directly, use ac_init_llvm_once */ void ac_init_llvm_once(void); - -bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, - enum radeon_family family, - enum ac_target_machine_options tm_options); +bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family, + enum ac_target_machine_options tm_options); void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler); struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm); void ac_destroy_llvm_passes(struct ac_compiler_passes *p); bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module, - char **pelf_buffer, size_t *pelf_size); + char **pelf_buffer, size_t *pelf_size); void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr); void ac_enable_global_isel(LLVMTargetMachineRef tm); -static inline bool -ac_has_vec3_support(enum chip_class chip, bool use_format) +static inline bool ac_has_vec3_support(enum chip_class chip, bool use_format) { - if (chip == GFX6 && !use_format) { - /* GFX6 only supports vec3 with load/store format. */ - return false; - } + if (chip == GFX6 && !use_format) { + /* GFX6 only supports vec3 with load/store format. */ + return false; + } - return LLVM_VERSION_MAJOR >= 9; + return LLVM_VERSION_MAJOR >= 9; } #ifdef __cplusplus diff -Nru mesa-20.2.1/src/amd/llvm/ac_nir_to_llvm.c mesa-20.2.6/src/amd/llvm/ac_nir_to_llvm.c --- mesa-20.2.1/src/amd/llvm/ac_nir_to_llvm.c 2020-10-14 17:19:10.296516200 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_nir_to_llvm.c 2020-12-16 21:42:03.544110000 +0000 @@ -21,496 +21,447 @@ * IN THE SOFTWARE. */ -#include - #include "ac_nir_to_llvm.h" + +#include "ac_binary.h" #include "ac_llvm_build.h" #include "ac_llvm_util.h" -#include "ac_binary.h" -#include "sid.h" +#include "ac_shader_abi.h" +#include "ac_shader_util.h" #include "nir/nir.h" #include "nir/nir_deref.h" +#include "sid.h" #include "util/bitscan.h" #include "util/u_math.h" -#include "ac_shader_abi.h" -#include "ac_shader_util.h" +#include struct ac_nir_context { - struct ac_llvm_context ac; - struct ac_shader_abi *abi; - const struct ac_shader_args *args; + struct ac_llvm_context ac; + struct ac_shader_abi *abi; + const struct ac_shader_args *args; - gl_shader_stage stage; - shader_info *info; + gl_shader_stage stage; + shader_info *info; - LLVMValueRef *ssa_defs; + LLVMValueRef *ssa_defs; - LLVMValueRef scratch; - LLVMValueRef constant_data; + LLVMValueRef scratch; + LLVMValueRef constant_data; - struct hash_table *defs; - struct hash_table *phis; - struct hash_table *vars; - struct hash_table *verified_interp; + struct hash_table *defs; + struct hash_table *phis; + struct hash_table *vars; + struct hash_table *verified_interp; - LLVMValueRef main_function; - LLVMBasicBlockRef continue_block; - LLVMBasicBlockRef break_block; + LLVMValueRef main_function; + LLVMBasicBlockRef continue_block; + LLVMBasicBlockRef break_block; - int num_locals; - LLVMValueRef *locals; + int num_locals; + LLVMValueRef *locals; }; -static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, - nir_deref_instr *deref_instr, - const nir_instr *instr, - bool image); - -static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, - nir_deref_instr *deref_instr, - enum ac_descriptor_type desc_type, - const nir_instr *instr, - LLVMValueRef index, - bool image, bool write); - -static void -build_store_values_extended(struct ac_llvm_context *ac, - LLVMValueRef *values, - unsigned value_count, - unsigned value_stride, - LLVMValueRef vec) -{ - LLVMBuilderRef builder = ac->builder; - unsigned i; - - for (i = 0; i < value_count; i++) { - LLVMValueRef ptr = values[i * value_stride]; - LLVMValueRef index = LLVMConstInt(ac->i32, i, false); - LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); - LLVMBuildStore(builder, value, ptr); - } -} - -static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, - const nir_ssa_def *def) -{ - LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size); - if (def->num_components > 1) { - type = LLVMVectorType(type, def->num_components); - } - return type; +static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr, + const nir_instr *instr, bool image); + +static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr, + enum ac_descriptor_type desc_type, const nir_instr *instr, + LLVMValueRef index, bool image, bool write); + +static void build_store_values_extended(struct ac_llvm_context *ac, LLVMValueRef *values, + unsigned value_count, unsigned value_stride, + LLVMValueRef vec) +{ + LLVMBuilderRef builder = ac->builder; + unsigned i; + + for (i = 0; i < value_count; i++) { + LLVMValueRef ptr = values[i * value_stride]; + LLVMValueRef index = LLVMConstInt(ac->i32, i, false); + LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); + LLVMBuildStore(builder, value, ptr); + } +} + +static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def) +{ + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size); + if (def->num_components > 1) { + type = LLVMVectorType(type, def->num_components); + } + return type; } static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src) { - assert(src.is_ssa); - return nir->ssa_defs[src.ssa->index]; + assert(src.is_ssa); + return nir->ssa_defs[src.ssa->index]; } -static LLVMValueRef -get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size) +static LLVMValueRef get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size) { - LLVMValueRef ptr = get_src(ctx, src); - ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, ""); - int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + LLVMValueRef ptr = get_src(ctx, src); + ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, ""); + int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size); + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size); - return LLVMBuildBitCast(ctx->ac.builder, ptr, - LLVMPointerType(type, addr_space), ""); + return LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(type, addr_space), ""); } -static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, - const struct nir_block *b) +static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, const struct nir_block *b) { - struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b); - return (LLVMBasicBlockRef)entry->data; + struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b); + return (LLVMBasicBlockRef)entry->data; } -static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, - nir_alu_src src, +static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src, unsigned num_components) { - LLVMValueRef value = get_src(ctx, src.src); - bool need_swizzle = false; + LLVMValueRef value = get_src(ctx, src.src); + bool need_swizzle = false; + + assert(value); + unsigned src_components = ac_get_llvm_num_components(value); + for (unsigned i = 0; i < num_components; ++i) { + assert(src.swizzle[i] < src_components); + if (src.swizzle[i] != i) + need_swizzle = true; + } + + if (need_swizzle || num_components != src_components) { + LLVMValueRef masks[] = {LLVMConstInt(ctx->ac.i32, src.swizzle[0], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[1], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[2], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)}; + + if (src_components > 1 && num_components == 1) { + value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], ""); + } else if (src_components == 1 && num_components > 1) { + LLVMValueRef values[] = {value, value, value, value}; + value = ac_build_gather_values(&ctx->ac, values, num_components); + } else { + LLVMValueRef swizzle = LLVMConstVector(masks, num_components); + value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, ""); + } + } + assert(!src.negate); + assert(!src.abs); + return value; +} + +static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred, + LLVMValueRef src0, LLVMValueRef src1) +{ + LLVMTypeRef src0_type = LLVMTypeOf(src0); + LLVMTypeRef src1_type = LLVMTypeOf(src1); + + if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { + src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, ""); + } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) { + src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, ""); + } + + LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, ""); + return LLVMBuildSelect(ctx->builder, result, LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), + ctx->i32_0, ""); +} + +static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, LLVMRealPredicate pred, + LLVMValueRef src0, LLVMValueRef src1) +{ + LLVMValueRef result; + src0 = ac_to_float(ctx, src0); + src1 = ac_to_float(ctx, src1); + result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, ""); + return LLVMBuildSelect(ctx->builder, result, LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), + ctx->i32_0, ""); +} + +static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, const char *intrin, + LLVMTypeRef result_type, LLVMValueRef src0) +{ + char name[64], type[64]; + LLVMValueRef params[] = { + ac_to_float(ctx, src0), + }; + + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); +} + +static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin, + LLVMTypeRef result_type, LLVMValueRef src0, + LLVMValueRef src1) +{ + char name[64], type[64]; + LLVMValueRef params[] = { + ac_to_float(ctx, src0), + ac_to_float(ctx, src1), + }; + + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); +} + +static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, const char *intrin, + LLVMTypeRef result_type, LLVMValueRef src0, + LLVMValueRef src1, LLVMValueRef src2) +{ + char name[64], type[64]; + LLVMValueRef params[] = { + ac_to_float(ctx, src0), + ac_to_float(ctx, src1), + ac_to_float(ctx, src2), + }; + + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); +} + +static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1, + LLVMValueRef src2) +{ + LLVMTypeRef src1_type = LLVMTypeOf(src1); + LLVMTypeRef src2_type = LLVMTypeOf(src2); + + assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind); + + if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) { + src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, ""); + } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { + src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, ""); + } + + LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""); + return LLVMBuildSelect(ctx->builder, v, ac_to_integer_or_pointer(ctx, src1), + ac_to_integer_or_pointer(ctx, src2), ""); +} + +static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, LLVMValueRef src0) +{ + return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, "")); +} + +static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, const char *intrin, + LLVMValueRef src0, LLVMValueRef src1) +{ + LLVMTypeRef ret_type; + LLVMTypeRef types[] = {ctx->i32, ctx->i1}; + LLVMValueRef res; + LLVMValueRef params[] = {src0, src1}; + ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true); + + res = ac_build_intrinsic(ctx, intrin, ret_type, params, 2, AC_FUNC_ATTR_READNONE); + + res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); + res = LLVMBuildZExt(ctx->builder, res, ctx->i32, ""); + return res; +} + +static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + LLVMValueRef result = + LLVMBuildAnd(ctx->builder, src0, + LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); + result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, ""); + + switch (bitsize) { + case 16: + return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, ""); + case 32: + return result; + case 64: + return LLVMBuildFPExt(ctx->builder, result, ctx->f64, ""); + default: + unreachable("Unsupported bit size."); + } +} + +static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, LLVMValueRef src0) +{ + src0 = ac_to_float(ctx, src0); + LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); + return LLVMBuildSExt(ctx->builder, LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""), + ctx->i32, ""); +} + +static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); + + switch (bitsize) { + case 8: + return LLVMBuildTrunc(ctx->builder, result, ctx->i8, ""); + case 16: + return LLVMBuildTrunc(ctx->builder, result, ctx->i16, ""); + case 32: + return result; + case 64: + return LLVMBuildZExt(ctx->builder, result, ctx->i64, ""); + default: + unreachable("Unsupported bit size."); + } +} + +static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); + return LLVMBuildSExt(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""), + ctx->i32, ""); +} + +static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef result; + LLVMValueRef cond = NULL; + + src0 = ac_to_float(ctx, src0); + result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); + + if (ctx->chip_class >= GFX8) { + LLVMValueRef args[2]; + /* Check if the result is a denormal - and flush to 0 if so. */ + args[0] = result; + args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false); + cond = + ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE); + } + + /* need to convert back up to f32 */ + result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); + + if (ctx->chip_class >= GFX8) + result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); + else { + /* for GFX6-GFX7 */ + /* 0x38800000 is smallest half float value (2^-14) in 32-bit float, + * so compare the result and flush to 0 if it's smaller. + */ + LLVMValueRef temp, cond2; + temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result); + cond = LLVMBuildFCmp( + ctx->builder, LLVMRealOGT, + LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""), + temp, ""); + cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE, temp, ctx->f32_0, ""); + cond = LLVMBuildAnd(ctx->builder, cond, cond2, ""); + result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); + } + return result; +} + +static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, LLVMValueRef src0, + LLVMValueRef src1) +{ + LLVMValueRef dst64, result; + src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, ""); + src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, ""); - assert(value); - unsigned src_components = ac_get_llvm_num_components(value); - for (unsigned i = 0; i < num_components; ++i) { - assert(src.swizzle[i] < src_components); - if (src.swizzle[i] != i) - need_swizzle = true; - } - - if (need_swizzle || num_components != src_components) { - LLVMValueRef masks[] = { - LLVMConstInt(ctx->ac.i32, src.swizzle[0], false), - LLVMConstInt(ctx->ac.i32, src.swizzle[1], false), - LLVMConstInt(ctx->ac.i32, src.swizzle[2], false), - LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)}; - - if (src_components > 1 && num_components == 1) { - value = LLVMBuildExtractElement(ctx->ac.builder, value, - masks[0], ""); - } else if (src_components == 1 && num_components > 1) { - LLVMValueRef values[] = {value, value, value, value}; - value = ac_build_gather_values(&ctx->ac, values, num_components); - } else { - LLVMValueRef swizzle = LLVMConstVector(masks, num_components); - value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, - swizzle, ""); - } - } - assert(!src.negate); - assert(!src.abs); - return value; -} - -static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, - LLVMIntPredicate pred, LLVMValueRef src0, - LLVMValueRef src1) -{ - LLVMTypeRef src0_type = LLVMTypeOf(src0); - LLVMTypeRef src1_type = LLVMTypeOf(src1); - - if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind && - LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { - src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, ""); - } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && - LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) { - src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, ""); - } - - LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, ""); - return LLVMBuildSelect(ctx->builder, result, - LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), - ctx->i32_0, ""); + dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); + dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); + result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); + return result; } -static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, - LLVMRealPredicate pred, LLVMValueRef src0, +static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1) { - LLVMValueRef result; - src0 = ac_to_float(ctx, src0); - src1 = ac_to_float(ctx, src1); - result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, ""); - return LLVMBuildSelect(ctx->builder, result, - LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), - ctx->i32_0, ""); -} - -static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, - const char *intrin, - LLVMTypeRef result_type, - LLVMValueRef src0) -{ - char name[64], type[64]; - LLVMValueRef params[] = { - ac_to_float(ctx, src0), - }; - - ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); - ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); - assert(length < sizeof(name)); - return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); -} - -static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, - const char *intrin, - LLVMTypeRef result_type, - LLVMValueRef src0, LLVMValueRef src1) -{ - char name[64], type[64]; - LLVMValueRef params[] = { - ac_to_float(ctx, src0), - ac_to_float(ctx, src1), - }; - - ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); - ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); - assert(length < sizeof(name)); - return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); -} - -static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, - const char *intrin, - LLVMTypeRef result_type, - LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) -{ - char name[64], type[64]; - LLVMValueRef params[] = { - ac_to_float(ctx, src0), - ac_to_float(ctx, src1), - ac_to_float(ctx, src2), - }; - - ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); - ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); - assert(length < sizeof(name)); - return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); -} - -static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) -{ - LLVMTypeRef src1_type = LLVMTypeOf(src1); - LLVMTypeRef src2_type = LLVMTypeOf(src2); - - assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind); - - if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && - LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) { - src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, ""); - } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind && - LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { - src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, ""); - } - - LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, - ctx->i32_0, ""); - return LLVMBuildSelect(ctx->builder, v, - ac_to_integer_or_pointer(ctx, src1), - ac_to_integer_or_pointer(ctx, src2), ""); -} - -static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, "")); -} - -static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, - const char *intrin, - LLVMValueRef src0, LLVMValueRef src1) -{ - LLVMTypeRef ret_type; - LLVMTypeRef types[] = { ctx->i32, ctx->i1 }; - LLVMValueRef res; - LLVMValueRef params[] = { src0, src1 }; - ret_type = LLVMStructTypeInContext(ctx->context, types, - 2, true); - - res = ac_build_intrinsic(ctx, intrin, ret_type, - params, 2, AC_FUNC_ATTR_READNONE); - - res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); - res = LLVMBuildZExt(ctx->builder, res, ctx->i32, ""); - return res; -} - -static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, - LLVMValueRef src0, - unsigned bitsize) -{ - LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, - LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), - ""); - result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, ""); - - switch (bitsize) { - case 16: - return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, ""); - case 32: - return result; - case 64: - return LLVMBuildFPExt(ctx->builder, result, ctx->f64, ""); - default: - unreachable("Unsupported bit size."); - } -} - -static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - src0 = ac_to_float(ctx, src0); - LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); - return LLVMBuildSExt(ctx->builder, - LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""), - ctx->i32, ""); -} - -static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, - LLVMValueRef src0, - unsigned bitsize) -{ - LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); - - switch (bitsize) { - case 8: - return LLVMBuildTrunc(ctx->builder, result, ctx->i8, ""); - case 16: - return LLVMBuildTrunc(ctx->builder, result, ctx->i16, ""); - case 32: - return result; - case 64: - return LLVMBuildZExt(ctx->builder, result, ctx->i64, ""); - default: - unreachable("Unsupported bit size."); - } -} - -static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); - return LLVMBuildSExt(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""), - ctx->i32, ""); -} - -static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef result; - LLVMValueRef cond = NULL; - - src0 = ac_to_float(ctx, src0); - result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); - - if (ctx->chip_class >= GFX8) { - LLVMValueRef args[2]; - /* Check if the result is a denormal - and flush to 0 if so. */ - args[0] = result; - args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false); - cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE); - } - - /* need to convert back up to f32 */ - result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); - - if (ctx->chip_class >= GFX8) - result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); - else { - /* for GFX6-GFX7 */ - /* 0x38800000 is smallest half float value (2^-14) in 32-bit float, - * so compare the result and flush to 0 if it's smaller. - */ - LLVMValueRef temp, cond2; - temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result); - cond = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, - LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""), - temp, ""); - cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE, - temp, ctx->f32_0, ""); - cond = LLVMBuildAnd(ctx->builder, cond, cond2, ""); - result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); - } - return result; -} - -static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1) -{ - LLVMValueRef dst64, result; - src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, ""); - src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, ""); - - dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); - dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); - result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); - return result; -} - -static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1) -{ - LLVMValueRef dst64, result; - src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); - src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); - - dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); - dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); - result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); - return result; -} - -static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, - LLVMValueRef bits, LLVMValueRef offset) -{ - /* mask = ((1 << bits) - 1) << offset */ - return LLVMBuildShl(ctx->builder, - LLVMBuildSub(ctx->builder, - LLVMBuildShl(ctx->builder, - ctx->i32_1, - bits, ""), - ctx->i32_1, ""), - offset, ""); -} - -static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, - LLVMValueRef mask, LLVMValueRef insert, - LLVMValueRef base) -{ - /* Calculate: - * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base)) - * Use the right-hand side, which the LLVM backend can convert to V_BFI. - */ - return LLVMBuildXor(ctx->builder, base, - LLVMBuildAnd(ctx->builder, mask, - LLVMBuildXor(ctx->builder, insert, base, ""), ""), ""); -} - -static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx, - LLVMValueRef src0, - LLVMValueRef (*pack)(struct ac_llvm_context *ctx, - LLVMValueRef args[2])) -{ - LLVMValueRef comp[2]; - - src0 = ac_to_float(ctx, src0); - comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, ""); - comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); - - return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, ""); -} - -static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); - LLVMValueRef temps[2], val; - int i; - - for (i = 0; i < 2; i++) { - val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0; - val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, ""); - val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); - temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); - } - return ac_build_gather_values(ctx, temps, 2); -} - -static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, - nir_op op, - LLVMValueRef src0) -{ - unsigned mask; - int idx; - LLVMValueRef result; - - if (op == nir_op_fddx_fine) - mask = AC_TID_MASK_LEFT; - else if (op == nir_op_fddy_fine) - mask = AC_TID_MASK_TOP; - else - mask = AC_TID_MASK_TOP_LEFT; - - /* for DDX we want to next X pixel, DDY next Y pixel. */ - if (op == nir_op_fddx_fine || - op == nir_op_fddx_coarse || - op == nir_op_fddx) - idx = 1; - else - idx = 2; + LLVMValueRef dst64, result; + src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); + src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); + + dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); + dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); + result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); + return result; +} - result = ac_build_ddxy(&ctx->ac, mask, idx, src0); - return result; +static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset) +{ + /* mask = ((1 << bits) - 1) << offset */ + return LLVMBuildShl( + ctx->builder, + LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""), + offset, ""); +} + +static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, LLVMValueRef mask, + LLVMValueRef insert, LLVMValueRef base) +{ + /* Calculate: + * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base)) + * Use the right-hand side, which the LLVM backend can convert to V_BFI. + */ + return LLVMBuildXor( + ctx->builder, base, + LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), ""); +} + +static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0, + LLVMValueRef (*pack)(struct ac_llvm_context *ctx, + LLVMValueRef args[2])) +{ + LLVMValueRef comp[2]; + + src0 = ac_to_float(ctx, src0); + comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, ""); + comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); + + return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, ""); +} + +static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); + LLVMValueRef temps[2], val; + int i; + + for (i = 0; i < 2; i++) { + val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0; + val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, ""); + val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); + temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); + } + return ac_build_gather_values(ctx, temps, 2); +} + +static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0) +{ + unsigned mask; + int idx; + LLVMValueRef result; + + if (op == nir_op_fddx_fine) + mask = AC_TID_MASK_LEFT; + else if (op == nir_op_fddy_fine) + mask = AC_TID_MASK_TOP; + else + mask = AC_TID_MASK_TOP_LEFT; + + /* for DDX we want to next X pixel, DDY next Y pixel. */ + if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx) + idx = 1; + else + idx = 2; + + result = ac_build_ddxy(&ctx->ac, mask, idx, src0); + return result; } struct waterfall_context { - LLVMBasicBlockRef phi_bb[2]; - bool use_waterfall; + LLVMBasicBlockRef phi_bb[2]; + bool use_waterfall; }; /* To deal with divergent descriptors we can create a loop that handles all @@ -519,7 +470,7 @@ * * These helper create the begin and end of the loop leaving the caller * to implement the body. - * + * * params: * - ctx is the usal nir context * - wctx is a temporary struct containing some loop info. Can be left uninitialized. @@ -527,784 +478,749 @@ * - divergent is whether value is actually divergent. If false we just pass * things through. */ -static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx, - struct waterfall_context *wctx, - LLVMValueRef value, bool divergent) +static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx, + LLVMValueRef value, bool divergent) { - /* If the app claims the value is divergent but it is constant we can - * end up with a dynamic index of NULL. */ - if (!value) - divergent = false; + /* If the app claims the value is divergent but it is constant we can + * end up with a dynamic index of NULL. */ + if (!value) + divergent = false; - wctx->use_waterfall = divergent; - if (!divergent) - return value; + wctx->use_waterfall = divergent; + if (!divergent) + return value; - ac_build_bgnloop(&ctx->ac, 6000); + ac_build_bgnloop(&ctx->ac, 6000); - LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL); + LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL); - LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value, - scalar_value, "uniform_active"); + LLVMValueRef active = + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value, scalar_value, "uniform_active"); - wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder); - ac_build_ifcc(&ctx->ac, active, 6001); + wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder); + ac_build_ifcc(&ctx->ac, active, 6001); - return scalar_value; + return scalar_value; } -static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, - struct waterfall_context *wctx, - LLVMValueRef value) +static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx, + LLVMValueRef value) { - LLVMValueRef ret = NULL; - LLVMValueRef phi_src[2]; - LLVMValueRef cc_phi_src[2] = { - LLVMConstInt(ctx->ac.i32, 0, false), - LLVMConstInt(ctx->ac.i32, 0xffffffff, false), - }; + LLVMValueRef ret = NULL; + LLVMValueRef phi_src[2]; + LLVMValueRef cc_phi_src[2] = { + LLVMConstInt(ctx->ac.i32, 0, false), + LLVMConstInt(ctx->ac.i32, 0xffffffff, false), + }; - if (!wctx->use_waterfall) - return value; + if (!wctx->use_waterfall) + return value; - wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder); + wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder); - ac_build_endif(&ctx->ac, 6001); + ac_build_endif(&ctx->ac, 6001); - if (value) { - phi_src[0] = LLVMGetUndef(LLVMTypeOf(value)); - phi_src[1] = value; + if (value) { + phi_src[0] = LLVMGetUndef(LLVMTypeOf(value)); + phi_src[1] = value; - ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb); - } + ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb); + } - /* - * By using the optimization barrier on the exit decision, we decouple - * the operations from the break, and hence avoid LLVM hoisting the - * opteration into the break block. - */ - LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb); - ac_build_optimization_barrier(&ctx->ac, &cc); + /* + * By using the optimization barrier on the exit decision, we decouple + * the operations from the break, and hence avoid LLVM hoisting the + * opteration into the break block. + */ + LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb); + ac_build_optimization_barrier(&ctx->ac, &cc); - LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2"); - ac_build_ifcc(&ctx->ac, active, 6002); - ac_build_break(&ctx->ac); - ac_build_endif(&ctx->ac, 6002); + LLVMValueRef active = + LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2"); + ac_build_ifcc(&ctx->ac, active, 6002); + ac_build_break(&ctx->ac); + ac_build_endif(&ctx->ac, 6002); - ac_build_endloop(&ctx->ac, 6000); - return ret; + ac_build_endloop(&ctx->ac, 6000); + return ret; } static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) { - LLVMValueRef src[4], result = NULL; - unsigned num_components = instr->dest.dest.ssa.num_components; - unsigned src_components; - LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa); - - assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src)); - switch (instr->op) { - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - src_components = 1; - break; - case nir_op_pack_half_2x16: - case nir_op_pack_snorm_2x16: - case nir_op_pack_unorm_2x16: - src_components = 2; - break; - case nir_op_unpack_half_2x16: - src_components = 1; - break; - case nir_op_cube_face_coord: - case nir_op_cube_face_index: - src_components = 3; - break; - default: - src_components = num_components; - break; - } - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - src[i] = get_alu_src(ctx, instr->src[i], src_components); - - switch (instr->op) { - case nir_op_mov: - result = src[0]; - break; - case nir_op_fneg: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFNeg(ctx->ac.builder, src[0], ""); - if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) { - /* fneg will be optimized by backend compiler with sign - * bit removed via XOR. This is probably a LLVM bug. - */ - result = ac_build_canonicalize(&ctx->ac, result, - instr->dest.dest.ssa.bit_size); - } - break; - case nir_op_ineg: - result = LLVMBuildNeg(ctx->ac.builder, src[0], ""); - break; - case nir_op_inot: - result = LLVMBuildNot(ctx->ac.builder, src[0], ""); - break; - case nir_op_iadd: - result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_fadd: - src[0] = ac_to_float(&ctx->ac, src[0]); - src[1] = ac_to_float(&ctx->ac, src[1]); - result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_fsub: - src[0] = ac_to_float(&ctx->ac, src[0]); - src[1] = ac_to_float(&ctx->ac, src[1]); - result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_isub: - result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_imul: - result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_imod: - result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_umod: - result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_fmod: - /* lower_fmod only lower 16-bit and 32-bit fmod */ - assert(instr->dest.dest.ssa.bit_size == 64); - src[0] = ac_to_float(&ctx->ac, src[0]); - src[1] = ac_to_float(&ctx->ac, src[1]); - result = ac_build_fdiv(&ctx->ac, src[0], src[1]); - result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", - ac_to_float_type(&ctx->ac, def_type), result); - result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, ""); - result = LLVMBuildFSub(ctx->ac.builder, src[0], result, ""); - break; - case nir_op_irem: - result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_idiv: - result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_udiv: - result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_fmul: - src[0] = ac_to_float(&ctx->ac, src[0]); - src[1] = ac_to_float(&ctx->ac, src[1]); - result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_frcp: - /* For doubles, we need precise division to pass GLCTS. */ - if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && - ac_get_type_size(def_type) == 8) { - result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, - ac_to_float(&ctx->ac, src[0]), ""); - } else { - result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", - ac_to_float_type(&ctx->ac, def_type), src[0]); - } - if (ctx->abi->clamp_div_by_zero) - result = ac_build_fmin(&ctx->ac, result, - LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); - break; - case nir_op_iand: - result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_ior: - result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_ixor: - result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_ishl: - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) - src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], - LLVMTypeOf(src[0]), ""); - else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) - src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], - LLVMTypeOf(src[0]), ""); - result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_ishr: - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) - src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], - LLVMTypeOf(src[0]), ""); - else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) - src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], - LLVMTypeOf(src[0]), ""); - result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_ushr: - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) - src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], - LLVMTypeOf(src[0]), ""); - else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) - src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], - LLVMTypeOf(src[0]), ""); - result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], ""); - break; - case nir_op_ilt32: - result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]); - break; - case nir_op_ine32: - result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]); - break; - case nir_op_ieq32: - result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]); - break; - case nir_op_ige32: - result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]); - break; - case nir_op_ult32: - result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]); - break; - case nir_op_uge32: - result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]); - break; - case nir_op_feq32: - result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]); - break; - case nir_op_fne32: - result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]); - break; - case nir_op_flt32: - result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]); - break; - case nir_op_fge32: - result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]); - break; - case nir_op_fabs: - result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs", - ac_to_float_type(&ctx->ac, def_type), src[0]); - if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) { - /* fabs will be optimized by backend compiler with sign - * bit removed via AND. - */ - result = ac_build_canonicalize(&ctx->ac, result, - instr->dest.dest.ssa.bit_size); - } - break; - case nir_op_iabs: - result = emit_iabs(&ctx->ac, src[0]); - break; - case nir_op_imax: - result = ac_build_imax(&ctx->ac, src[0], src[1]); - break; - case nir_op_imin: - result = ac_build_imin(&ctx->ac, src[0], src[1]); - break; - case nir_op_umax: - result = ac_build_umax(&ctx->ac, src[0], src[1]); - break; - case nir_op_umin: - result = ac_build_umin(&ctx->ac, src[0], src[1]); - break; - case nir_op_isign: - result = ac_build_isign(&ctx->ac, src[0], - instr->dest.dest.ssa.bit_size); - break; - case nir_op_fsign: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_fsign(&ctx->ac, src[0], - instr->dest.dest.ssa.bit_size); - break; - case nir_op_ffloor: - result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_ftrunc: - result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fceil: - result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fround_even: - result = emit_intrin_1f_param(&ctx->ac, "llvm.rint", - ac_to_float_type(&ctx->ac, def_type),src[0]); - break; - case nir_op_ffract: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_fract(&ctx->ac, src[0], - instr->dest.dest.ssa.bit_size); - break; - case nir_op_fsin: - result = emit_intrin_1f_param(&ctx->ac, "llvm.sin", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fcos: - result = emit_intrin_1f_param(&ctx->ac, "llvm.cos", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fsqrt: - result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fexp2: - result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_flog2: - result = emit_intrin_1f_param(&ctx->ac, "llvm.log2", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_frsq: - result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq", - ac_to_float_type(&ctx->ac, def_type), src[0]); - if (ctx->abi->clamp_div_by_zero) - result = ac_build_fmin(&ctx->ac, result, - LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); - break; - case nir_op_frexp_exp: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_frexp_exp(&ctx->ac, src[0], - ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))); - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16) - result = LLVMBuildSExt(ctx->ac.builder, result, - ctx->ac.i32, ""); - break; - case nir_op_frexp_sig: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_frexp_mant(&ctx->ac, src[0], - instr->dest.dest.ssa.bit_size); - break; - case nir_op_fpow: - result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - break; - case nir_op_fmax: - result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - if (ctx->ac.chip_class < GFX9 && - instr->dest.dest.ssa.bit_size == 32) { - /* Only pre-GFX9 chips do not flush denorms. */ - result = ac_build_canonicalize(&ctx->ac, result, - instr->dest.dest.ssa.bit_size); - } - break; - case nir_op_fmin: - result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - if (ctx->ac.chip_class < GFX9 && - instr->dest.dest.ssa.bit_size == 32) { - /* Only pre-GFX9 chips do not flush denorms. */ - result = ac_build_canonicalize(&ctx->ac, result, - instr->dest.dest.ssa.bit_size); - } - break; - case nir_op_ffma: - /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ - result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); - break; - case nir_op_ldexp: - src[0] = ac_to_float(&ctx->ac, src[0]); - if (ac_get_elem_bits(&ctx->ac, def_type) == 32) - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE); - else if (ac_get_elem_bits(&ctx->ac, def_type) == 16) - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE); - else - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE); - break; - case nir_op_bfm: - result = emit_bfm(&ctx->ac, src[0], src[1]); - break; - case nir_op_bitfield_select: - result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]); - break; - case nir_op_ubfe: - result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false); - break; - case nir_op_ibfe: - result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true); - break; - case nir_op_bitfield_reverse: - result = ac_build_bitfield_reverse(&ctx->ac, src[0]); - break; - case nir_op_bit_count: - result = ac_build_bit_count(&ctx->ac, src[0]); - break; - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - src[i] = ac_to_integer(&ctx->ac, src[i]); - result = ac_build_gather_values(&ctx->ac, src, num_components); - break; - case nir_op_f2i8: - case nir_op_f2i16: - case nir_op_f2i32: - case nir_op_f2i64: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); - break; - case nir_op_f2u8: - case nir_op_f2u16: - case nir_op_f2u32: - case nir_op_f2u64: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); - break; - case nir_op_i2f16: - case nir_op_i2f32: - case nir_op_i2f64: - result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - break; - case nir_op_u2f16: - case nir_op_u2f32: - case nir_op_u2f64: - result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - break; - case nir_op_f2f16_rtz: - case nir_op_f2f16: - case nir_op_f2fmp: - src[0] = ac_to_float(&ctx->ac, src[0]); - - /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it, - * all f32->f16 conversions have to round towards zero, because both scalar - * and vec2 down-conversions have to round equally. - */ - if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || - instr->op == nir_op_f2f16_rtz) { - src[0] = ac_to_float(&ctx->ac, src[0]); - - if (LLVMTypeOf(src[0]) == ctx->ac.f64) - src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, ""); - - /* Fast path conversion. This only works if NIR is vectorized - * to vec2 16. - */ - if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) { - LLVMValueRef args[] = { - ac_llvm_extract_elem(&ctx->ac, src[0], 0), - ac_llvm_extract_elem(&ctx->ac, src[0], 1), - }; - result = ac_build_cvt_pkrtz_f16(&ctx->ac, args); - break; - } - - assert(ac_get_llvm_num_components(src[0]) == 1); - LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) }; - result = ac_build_cvt_pkrtz_f16(&ctx->ac, param); - result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); - } else { - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) - result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - else - result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - } - break; - case nir_op_f2f16_rtne: - case nir_op_f2f32: - case nir_op_f2f64: - src[0] = ac_to_float(&ctx->ac, src[0]); - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) - result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - else - result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - break; - case nir_op_u2u8: - case nir_op_u2u16: - case nir_op_u2ump: - case nir_op_u2u32: - case nir_op_u2u64: - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) - result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, ""); - else - result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); - break; - case nir_op_i2i8: - case nir_op_i2i16: - case nir_op_i2imp: - case nir_op_i2i32: - case nir_op_i2i64: - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) - result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, ""); - else - result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); - break; - case nir_op_b32csel: - result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); - break; - case nir_op_find_lsb: - result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]); - break; - case nir_op_ufind_msb: - result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32); - break; - case nir_op_ifind_msb: - result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32); - break; - case nir_op_uadd_carry: - result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]); - break; - case nir_op_usub_borrow: - result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); - break; - case nir_op_b2f16: - case nir_op_b2f32: - case nir_op_b2f64: - result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); - break; - case nir_op_f2b32: - result = emit_f2b(&ctx->ac, src[0]); - break; - case nir_op_b2i8: - case nir_op_b2i16: - case nir_op_b2i32: - case nir_op_b2i64: - result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); - break; - case nir_op_i2b32: - result = emit_i2b(&ctx->ac, src[0]); - break; - case nir_op_fquantize2f16: - result = emit_f2f16(&ctx->ac, src[0]); - break; - case nir_op_umul_high: - result = emit_umul_high(&ctx->ac, src[0], src[1]); - break; - case nir_op_imul_high: - result = emit_imul_high(&ctx->ac, src[0], src[1]); - break; - case nir_op_pack_half_2x16: - result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16); - break; - case nir_op_pack_snorm_2x16: - result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16); - break; - case nir_op_pack_unorm_2x16: - result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16); - break; - case nir_op_unpack_half_2x16: - result = emit_unpack_half_2x16(&ctx->ac, src[0]); - break; - case nir_op_fddx: - case nir_op_fddy: - case nir_op_fddx_fine: - case nir_op_fddy_fine: - case nir_op_fddx_coarse: - case nir_op_fddy_coarse: - result = emit_ddxy(ctx, instr->op, src[0]); - break; - - case nir_op_unpack_64_2x32_split_x: { - assert(ac_get_llvm_num_components(src[0]) == 1); - LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], - ctx->ac.v2i32, - ""); - result = LLVMBuildExtractElement(ctx->ac.builder, tmp, - ctx->ac.i32_0, ""); - break; - } - - case nir_op_unpack_64_2x32_split_y: { - assert(ac_get_llvm_num_components(src[0]) == 1); - LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], - ctx->ac.v2i32, - ""); - result = LLVMBuildExtractElement(ctx->ac.builder, tmp, - ctx->ac.i32_1, ""); - break; - } - - case nir_op_pack_64_2x32_split: { - LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2); - result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); - break; - } - - case nir_op_pack_32_2x16_split: { - LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2); - result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, ""); - break; - } - - case nir_op_unpack_32_2x16_split_x: { - LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], - ctx->ac.v2i16, - ""); - result = LLVMBuildExtractElement(ctx->ac.builder, tmp, - ctx->ac.i32_0, ""); - break; - } - - case nir_op_unpack_32_2x16_split_y: { - LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], - ctx->ac.v2i16, - ""); - result = LLVMBuildExtractElement(ctx->ac.builder, tmp, - ctx->ac.i32_1, ""); - break; - } - - case nir_op_cube_face_coord: { - src[0] = ac_to_float(&ctx->ac, src[0]); - LLVMValueRef results[2]; - LLVMValueRef in[3]; - for (unsigned chan = 0; chan < 3; chan++) - in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); - results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", - ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); - results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", - ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); - LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", - ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); - results[0] = ac_build_fdiv(&ctx->ac, results[0], ma); - results[1] = ac_build_fdiv(&ctx->ac, results[1], ma); - LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5); - results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, ""); - results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, ""); - result = ac_build_gather_values(&ctx->ac, results, 2); - break; - } - - case nir_op_cube_face_index: { - src[0] = ac_to_float(&ctx->ac, src[0]); - LLVMValueRef in[3]; - for (unsigned chan = 0; chan < 3; chan++) - in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", - ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); - break; - } - - case nir_op_fmin3: - result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", - ac_to_float_type(&ctx->ac, def_type), result, src[2]); - break; - case nir_op_umin3: - result = ac_build_umin(&ctx->ac, src[0], src[1]); - result = ac_build_umin(&ctx->ac, result, src[2]); - break; - case nir_op_imin3: - result = ac_build_imin(&ctx->ac, src[0], src[1]); - result = ac_build_imin(&ctx->ac, result, src[2]); - break; - case nir_op_fmax3: - result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", - ac_to_float_type(&ctx->ac, def_type), result, src[2]); - break; - case nir_op_umax3: - result = ac_build_umax(&ctx->ac, src[0], src[1]); - result = ac_build_umax(&ctx->ac, result, src[2]); - break; - case nir_op_imax3: - result = ac_build_imax(&ctx->ac, src[0], src[1]); - result = ac_build_imax(&ctx->ac, result, src[2]); - break; - case nir_op_fmed3: { - src[0] = ac_to_float(&ctx->ac, src[0]); - src[1] = ac_to_float(&ctx->ac, src[1]); - src[2] = ac_to_float(&ctx->ac, src[2]); - result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2], - instr->dest.dest.ssa.bit_size); - break; - } - case nir_op_imed3: { - LLVMValueRef tmp1 = ac_build_imin(&ctx->ac, src[0], src[1]); - LLVMValueRef tmp2 = ac_build_imax(&ctx->ac, src[0], src[1]); - tmp2 = ac_build_imin(&ctx->ac, tmp2, src[2]); - result = ac_build_imax(&ctx->ac, tmp1, tmp2); - break; - } - case nir_op_umed3: { - LLVMValueRef tmp1 = ac_build_umin(&ctx->ac, src[0], src[1]); - LLVMValueRef tmp2 = ac_build_umax(&ctx->ac, src[0], src[1]); - tmp2 = ac_build_umin(&ctx->ac, tmp2, src[2]); - result = ac_build_umax(&ctx->ac, tmp1, tmp2); - break; - } - - default: - fprintf(stderr, "Unknown NIR alu instr: "); - nir_print_instr(&instr->instr, stderr); - fprintf(stderr, "\n"); - abort(); - } - - if (result) { - assert(instr->dest.dest.is_ssa); - result = ac_to_integer_or_pointer(&ctx->ac, result); - ctx->ssa_defs[instr->dest.dest.ssa.index] = result; - } -} - -static void visit_load_const(struct ac_nir_context *ctx, - const nir_load_const_instr *instr) -{ - LLVMValueRef values[4], value = NULL; - LLVMTypeRef element_type = - LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); - - for (unsigned i = 0; i < instr->def.num_components; ++i) { - switch (instr->def.bit_size) { - case 8: - values[i] = LLVMConstInt(element_type, - instr->value[i].u8, false); - break; - case 16: - values[i] = LLVMConstInt(element_type, - instr->value[i].u16, false); - break; - case 32: - values[i] = LLVMConstInt(element_type, - instr->value[i].u32, false); - break; - case 64: - values[i] = LLVMConstInt(element_type, - instr->value[i].u64, false); - break; - default: - fprintf(stderr, - "unsupported nir load_const bit_size: %d\n", - instr->def.bit_size); - abort(); - } - } - if (instr->def.num_components > 1) { - value = LLVMConstVector(values, instr->def.num_components); - } else - value = values[0]; - - ctx->ssa_defs[instr->def.index] = value; -} - -static LLVMValueRef -get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements) -{ - LLVMValueRef size = - LLVMBuildExtractElement(ctx->ac.builder, descriptor, - LLVMConstInt(ctx->ac.i32, 2, false), ""); - - /* GFX8 only */ - if (ctx->ac.chip_class == GFX8 && in_elements) { - /* On GFX8, the descriptor contains the size in bytes, - * but TXQ must return the size in elements. - * The stride is always non-zero for resources using TXQ. - */ - LLVMValueRef stride = - LLVMBuildExtractElement(ctx->ac.builder, descriptor, - ctx->ac.i32_1, ""); - stride = LLVMBuildLShr(ctx->ac.builder, stride, - LLVMConstInt(ctx->ac.i32, 16, false), ""); - stride = LLVMBuildAnd(ctx->ac.builder, stride, - LLVMConstInt(ctx->ac.i32, 0x3fff, false), ""); - - size = LLVMBuildUDiv(ctx->ac.builder, size, stride, ""); - } - return size; + LLVMValueRef src[4], result = NULL; + unsigned num_components = instr->dest.dest.ssa.num_components; + unsigned src_components; + LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa); + + assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src)); + switch (instr->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + src_components = 1; + break; + case nir_op_pack_half_2x16: + case nir_op_pack_snorm_2x16: + case nir_op_pack_unorm_2x16: + src_components = 2; + break; + case nir_op_unpack_half_2x16: + src_components = 1; + break; + case nir_op_cube_face_coord: + case nir_op_cube_face_index: + src_components = 3; + break; + default: + src_components = num_components; + break; + } + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + src[i] = get_alu_src(ctx, instr->src[i], src_components); + + switch (instr->op) { + case nir_op_mov: + result = src[0]; + break; + case nir_op_fneg: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = LLVMBuildFNeg(ctx->ac.builder, src[0], ""); + if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) { + /* fneg will be optimized by backend compiler with sign + * bit removed via XOR. This is probably a LLVM bug. + */ + result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size); + } + break; + case nir_op_ineg: + result = LLVMBuildNeg(ctx->ac.builder, src[0], ""); + break; + case nir_op_inot: + result = LLVMBuildNot(ctx->ac.builder, src[0], ""); + break; + case nir_op_iadd: + result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_fadd: + src[0] = ac_to_float(&ctx->ac, src[0]); + src[1] = ac_to_float(&ctx->ac, src[1]); + result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_fsub: + src[0] = ac_to_float(&ctx->ac, src[0]); + src[1] = ac_to_float(&ctx->ac, src[1]); + result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_isub: + result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_imul: + result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_imod: + result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_umod: + result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_fmod: + /* lower_fmod only lower 16-bit and 32-bit fmod */ + assert(instr->dest.dest.ssa.bit_size == 64); + src[0] = ac_to_float(&ctx->ac, src[0]); + src[1] = ac_to_float(&ctx->ac, src[1]); + result = ac_build_fdiv(&ctx->ac, src[0], src[1]); + result = + emit_intrin_1f_param(&ctx->ac, "llvm.floor", ac_to_float_type(&ctx->ac, def_type), result); + result = LLVMBuildFMul(ctx->ac.builder, src[1], result, ""); + result = LLVMBuildFSub(ctx->ac.builder, src[0], result, ""); + break; + case nir_op_irem: + result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_idiv: + result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_udiv: + result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_fmul: + src[0] = ac_to_float(&ctx->ac, src[0]); + src[1] = ac_to_float(&ctx->ac, src[1]); + result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_frcp: + /* For doubles, we need precise division to pass GLCTS. */ + if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) { + result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, ac_to_float(&ctx->ac, src[0]), ""); + } else { + result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", + ac_to_float_type(&ctx->ac, def_type), src[0]); + } + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); + break; + case nir_op_iand: + result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_ior: + result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_ixor: + result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_ishl: + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < + ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""); + else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > + ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""); + result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_ishr: + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < + ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""); + else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > + ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""); + result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_ushr: + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < + ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""); + else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > + ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""); + result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], ""); + break; + case nir_op_ilt32: + result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]); + break; + case nir_op_ine32: + result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]); + break; + case nir_op_ieq32: + result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]); + break; + case nir_op_ige32: + result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]); + break; + case nir_op_ult32: + result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]); + break; + case nir_op_uge32: + result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]); + break; + case nir_op_feq32: + result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]); + break; + case nir_op_fne32: + result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]); + break; + case nir_op_flt32: + result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]); + break; + case nir_op_fge32: + result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]); + break; + case nir_op_fabs: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.fabs", ac_to_float_type(&ctx->ac, def_type), src[0]); + if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) { + /* fabs will be optimized by backend compiler with sign + * bit removed via AND. + */ + result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size); + } + break; + case nir_op_iabs: + result = emit_iabs(&ctx->ac, src[0]); + break; + case nir_op_imax: + result = ac_build_imax(&ctx->ac, src[0], src[1]); + break; + case nir_op_imin: + result = ac_build_imin(&ctx->ac, src[0], src[1]); + break; + case nir_op_umax: + result = ac_build_umax(&ctx->ac, src[0], src[1]); + break; + case nir_op_umin: + result = ac_build_umin(&ctx->ac, src[0], src[1]); + break; + case nir_op_isign: + result = ac_build_isign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_fsign: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = ac_build_fsign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_ffloor: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.floor", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_ftrunc: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.trunc", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_fceil: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.ceil", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_fround_even: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.rint", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_ffract: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = ac_build_fract(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_fsin: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.sin", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_fcos: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_fsqrt: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_fexp2: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.exp2", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_flog2: + result = + emit_intrin_1f_param(&ctx->ac, "llvm.log2", ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_frsq: + result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq", + ac_to_float_type(&ctx->ac, def_type), src[0]); + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); + break; + case nir_op_frexp_exp: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = ac_build_frexp_exp(&ctx->ac, src[0], ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0]))); + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16) + result = LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, ""); + break; + case nir_op_frexp_sig: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_fpow: + result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type), + src[0], src[1]); + break; + case nir_op_fmax: + result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type), + src[0], src[1]); + if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) { + /* Only pre-GFX9 chips do not flush denorms. */ + result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size); + } + break; + case nir_op_fmin: + result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type), + src[0], src[1]); + if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) { + /* Only pre-GFX9 chips do not flush denorms. */ + result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size); + } + break; + case nir_op_ffma: + /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ + result = + emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd", + ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); + break; + case nir_op_ldexp: + src[0] = ac_to_float(&ctx->ac, src[0]); + if (ac_get_elem_bits(&ctx->ac, def_type) == 32) + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, + AC_FUNC_ATTR_READNONE); + else if (ac_get_elem_bits(&ctx->ac, def_type) == 16) + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, + AC_FUNC_ATTR_READNONE); + else + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, + AC_FUNC_ATTR_READNONE); + break; + case nir_op_bfm: + result = emit_bfm(&ctx->ac, src[0], src[1]); + break; + case nir_op_bitfield_select: + result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]); + break; + case nir_op_ubfe: + result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false); + break; + case nir_op_ibfe: + result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true); + break; + case nir_op_bitfield_reverse: + result = ac_build_bitfield_reverse(&ctx->ac, src[0]); + break; + case nir_op_bit_count: + result = ac_build_bit_count(&ctx->ac, src[0]); + break; + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + src[i] = ac_to_integer(&ctx->ac, src[i]); + result = ac_build_gather_values(&ctx->ac, src, num_components); + break; + case nir_op_f2i8: + case nir_op_f2i16: + case nir_op_f2i32: + case nir_op_f2i64: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); + break; + case nir_op_f2u8: + case nir_op_f2u16: + case nir_op_f2u32: + case nir_op_f2u64: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); + break; + case nir_op_i2f16: + case nir_op_i2f32: + case nir_op_i2f64: + result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + break; + case nir_op_u2f16: + case nir_op_u2f32: + case nir_op_u2f64: + result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + break; + case nir_op_f2f16_rtz: + case nir_op_f2f16: + case nir_op_f2fmp: + src[0] = ac_to_float(&ctx->ac, src[0]); + + /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it, + * all f32->f16 conversions have to round towards zero, because both scalar + * and vec2 down-conversions have to round equally. + */ + if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) { + src[0] = ac_to_float(&ctx->ac, src[0]); + + if (LLVMTypeOf(src[0]) == ctx->ac.f64) + src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, ""); + + /* Fast path conversion. This only works if NIR is vectorized + * to vec2 16. + */ + if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) { + LLVMValueRef args[] = { + ac_llvm_extract_elem(&ctx->ac, src[0], 0), + ac_llvm_extract_elem(&ctx->ac, src[0], 1), + }; + result = ac_build_cvt_pkrtz_f16(&ctx->ac, args); + break; + } + + assert(ac_get_llvm_num_components(src[0]) == 1); + LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)}; + result = ac_build_cvt_pkrtz_f16(&ctx->ac, param); + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + } else { + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = + LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + else + result = + LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + } + break; + case nir_op_f2f16_rtne: + case nir_op_f2f32: + case nir_op_f2f64: + src[0] = ac_to_float(&ctx->ac, src[0]); + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + else + result = + LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + break; + case nir_op_u2u8: + case nir_op_u2u16: + case nir_op_u2ump: + case nir_op_u2u32: + case nir_op_u2u64: + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); + break; + case nir_op_i2i8: + case nir_op_i2i16: + case nir_op_i2imp: + case nir_op_i2i32: + case nir_op_i2i64: + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); + break; + case nir_op_b32csel: + result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); + break; + case nir_op_find_lsb: + result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]); + break; + case nir_op_ufind_msb: + result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32); + break; + case nir_op_ifind_msb: + result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32); + break; + case nir_op_uadd_carry: + result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]); + break; + case nir_op_usub_borrow: + result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); + break; + case nir_op_b2f16: + case nir_op_b2f32: + case nir_op_b2f64: + result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_f2b32: + result = emit_f2b(&ctx->ac, src[0]); + break; + case nir_op_b2i8: + case nir_op_b2i16: + case nir_op_b2i32: + case nir_op_b2i64: + result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_i2b32: + result = emit_i2b(&ctx->ac, src[0]); + break; + case nir_op_fquantize2f16: + result = emit_f2f16(&ctx->ac, src[0]); + break; + case nir_op_umul_high: + result = emit_umul_high(&ctx->ac, src[0], src[1]); + break; + case nir_op_imul_high: + result = emit_imul_high(&ctx->ac, src[0], src[1]); + break; + case nir_op_pack_half_2x16: + result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16); + break; + case nir_op_pack_snorm_2x16: + result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16); + break; + case nir_op_pack_unorm_2x16: + result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16); + break; + case nir_op_unpack_half_2x16: + result = emit_unpack_half_2x16(&ctx->ac, src[0]); + break; + case nir_op_fddx: + case nir_op_fddy: + case nir_op_fddx_fine: + case nir_op_fddy_fine: + case nir_op_fddx_coarse: + case nir_op_fddy_coarse: + result = emit_ddxy(ctx, instr->op, src[0]); + break; + + case nir_op_unpack_64_2x32_split_x: { + assert(ac_get_llvm_num_components(src[0]) == 1); + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, ""); + break; + } + + case nir_op_unpack_64_2x32_split_y: { + assert(ac_get_llvm_num_components(src[0]) == 1); + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, ""); + break; + } + + case nir_op_pack_64_2x32_split: { + LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2); + result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); + break; + } + + case nir_op_pack_32_2x16_split: { + LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2); + result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, ""); + break; + } + + case nir_op_unpack_32_2x16_split_x: { + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, ""); + break; + } + + case nir_op_unpack_32_2x16_split_y: { + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, ""); + break; + } + + case nir_op_cube_face_coord: { + src[0] = ac_to_float(&ctx->ac, src[0]); + LLVMValueRef results[2]; + LLVMValueRef in[3]; + for (unsigned chan = 0; chan < 3; chan++) + in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); + results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3, + AC_FUNC_ATTR_READNONE); + results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3, + AC_FUNC_ATTR_READNONE); + LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3, + AC_FUNC_ATTR_READNONE); + results[0] = ac_build_fdiv(&ctx->ac, results[0], ma); + results[1] = ac_build_fdiv(&ctx->ac, results[1], ma); + LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5); + results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, ""); + results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, ""); + result = ac_build_gather_values(&ctx->ac, results, 2); + break; + } + + case nir_op_cube_face_index: { + src[0] = ac_to_float(&ctx->ac, src[0]); + LLVMValueRef in[3]; + for (unsigned chan = 0; chan < 3; chan++) + in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3, + AC_FUNC_ATTR_READNONE); + break; + } + + case nir_op_fmin3: + result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type), + src[0], src[1]); + result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type), + result, src[2]); + break; + case nir_op_umin3: + result = ac_build_umin(&ctx->ac, src[0], src[1]); + result = ac_build_umin(&ctx->ac, result, src[2]); + break; + case nir_op_imin3: + result = ac_build_imin(&ctx->ac, src[0], src[1]); + result = ac_build_imin(&ctx->ac, result, src[2]); + break; + case nir_op_fmax3: + result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type), + src[0], src[1]); + result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type), + result, src[2]); + break; + case nir_op_umax3: + result = ac_build_umax(&ctx->ac, src[0], src[1]); + result = ac_build_umax(&ctx->ac, result, src[2]); + break; + case nir_op_imax3: + result = ac_build_imax(&ctx->ac, src[0], src[1]); + result = ac_build_imax(&ctx->ac, result, src[2]); + break; + case nir_op_fmed3: { + src[0] = ac_to_float(&ctx->ac, src[0]); + src[1] = ac_to_float(&ctx->ac, src[1]); + src[2] = ac_to_float(&ctx->ac, src[2]); + result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2], instr->dest.dest.ssa.bit_size); + break; + } + case nir_op_imed3: { + LLVMValueRef tmp1 = ac_build_imin(&ctx->ac, src[0], src[1]); + LLVMValueRef tmp2 = ac_build_imax(&ctx->ac, src[0], src[1]); + tmp2 = ac_build_imin(&ctx->ac, tmp2, src[2]); + result = ac_build_imax(&ctx->ac, tmp1, tmp2); + break; + } + case nir_op_umed3: { + LLVMValueRef tmp1 = ac_build_umin(&ctx->ac, src[0], src[1]); + LLVMValueRef tmp2 = ac_build_umax(&ctx->ac, src[0], src[1]); + tmp2 = ac_build_umin(&ctx->ac, tmp2, src[2]); + result = ac_build_umax(&ctx->ac, tmp1, tmp2); + break; + } + + default: + fprintf(stderr, "Unknown NIR alu instr: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + abort(); + } + + if (result) { + assert(instr->dest.dest.is_ssa); + result = ac_to_integer_or_pointer(&ctx->ac, result); + ctx->ssa_defs[instr->dest.dest.ssa.index] = result; + } +} + +static void visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr) +{ + LLVMValueRef values[4], value = NULL; + LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); + + for (unsigned i = 0; i < instr->def.num_components; ++i) { + switch (instr->def.bit_size) { + case 8: + values[i] = LLVMConstInt(element_type, instr->value[i].u8, false); + break; + case 16: + values[i] = LLVMConstInt(element_type, instr->value[i].u16, false); + break; + case 32: + values[i] = LLVMConstInt(element_type, instr->value[i].u32, false); + break; + case 64: + values[i] = LLVMConstInt(element_type, instr->value[i].u64, false); + break; + default: + fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size); + abort(); + } + } + if (instr->def.num_components > 1) { + value = LLVMConstVector(values, instr->def.num_components); + } else + value = values[0]; + + ctx->ssa_defs[instr->def.index] = value; +} + +static LLVMValueRef get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, + bool in_elements) +{ + LLVMValueRef size = + LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), ""); + + /* GFX8 only */ + if (ctx->ac.chip_class == GFX8 && in_elements) { + /* On GFX8, the descriptor contains the size in bytes, + * but TXQ must return the size in elements. + * The stride is always non-zero for resources using TXQ. + */ + LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, descriptor, ctx->ac.i32_1, ""); + stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, false), ""); + stride = LLVMBuildAnd(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 0x3fff, false), ""); + + size = LLVMBuildUDiv(ctx->ac.builder, size, stride, ""); + } + return size; } /* Gather4 should follow the same rules as bilinear filtering, but the hardware @@ -1322,3082 +1238,2855 @@ * runtime. In this case, return an i1 value that indicates whether the * descriptor was overridden (and hence a fixup of the sampler result is needed). */ -static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, - nir_variable *var, - struct ac_image_args *args, - const nir_tex_instr *instr) -{ - const struct glsl_type *type = glsl_without_array(var->type); - enum glsl_base_type stype = glsl_get_sampler_result_type(type); - LLVMValueRef wa_8888 = NULL; - LLVMValueRef half_texel[2]; - LLVMValueRef result; - - assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT); - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - LLVMValueRef formats; - LLVMValueRef data_format; - LLVMValueRef wa_formats; - - formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, ""); - - data_format = LLVMBuildLShr(ctx->builder, formats, - LLVMConstInt(ctx->i32, 20, false), ""); - data_format = LLVMBuildAnd(ctx->builder, data_format, - LLVMConstInt(ctx->i32, (1u << 6) - 1, false), ""); - wa_8888 = LLVMBuildICmp( - ctx->builder, LLVMIntEQ, data_format, - LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), - ""); - - uint32_t wa_num_format = - stype == GLSL_TYPE_UINT ? - S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) : - S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED); - wa_formats = LLVMBuildAnd(ctx->builder, formats, - LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), - ""); - wa_formats = LLVMBuildOr(ctx->builder, wa_formats, - LLVMConstInt(ctx->i32, wa_num_format, false), ""); - - formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, ""); - args->resource = LLVMBuildInsertElement( - ctx->builder, args->resource, formats, ctx->i32_1, ""); - } - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) { - assert(!wa_8888); - half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); - } else { - struct ac_image_args resinfo = {}; - LLVMBasicBlockRef bbs[2]; - - LLVMValueRef unnorm = NULL; - LLVMValueRef default_offset = ctx->f32_0; - if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && - !instr->is_array) { - /* In vulkan, whether the sampler uses unnormalized - * coordinates or not is a dynamic property of the - * sampler. Hence, to figure out whether or not we - * need to divide by the texture size, we need to test - * the sampler at runtime. This tests the bit set by - * radv_init_sampler(). - */ - LLVMValueRef sampler0 = - LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, ""); - sampler0 = LLVMBuildLShr(ctx->builder, sampler0, - LLVMConstInt(ctx->i32, 15, false), ""); - sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, ""); - unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, ""); - default_offset = LLVMConstReal(ctx->f32, -0.5); - } - - bbs[0] = LLVMGetInsertBlock(ctx->builder); - if (wa_8888 || unnorm) { - assert(!(wa_8888 && unnorm)); - LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm; - /* Skip the texture size query entirely if we don't need it. */ - ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000); - bbs[1] = LLVMGetInsertBlock(ctx->builder); - } - - /* Query the texture size. */ - resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array); - resinfo.opcode = ac_image_get_resinfo; - resinfo.dmask = 0xf; - resinfo.lod = ctx->i32_0; - resinfo.resource = args->resource; - resinfo.attributes = AC_FUNC_ATTR_READNONE; - LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo); - - /* Compute -0.5 / size. */ - for (unsigned c = 0; c < 2; c++) { - half_texel[c] = - LLVMBuildExtractElement(ctx->builder, size, - LLVMConstInt(ctx->i32, c, 0), ""); - half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); - half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]); - half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], - LLVMConstReal(ctx->f32, -0.5), ""); - } - - if (wa_8888 || unnorm) { - ac_build_endif(ctx, 2000); - - for (unsigned c = 0; c < 2; c++) { - LLVMValueRef values[2] = { default_offset, half_texel[c] }; - half_texel[c] = ac_build_phi(ctx, ctx->f32, 2, - values, bbs); - } - } - } - - for (unsigned c = 0; c < 2; c++) { - LLVMValueRef tmp; - tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, ""); - args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); - } - - args->attributes = AC_FUNC_ATTR_READNONE; - result = ac_build_image_opcode(ctx, args); - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - LLVMValueRef tmp, tmp2; - - /* if the cube workaround is in place, f2i the result. */ - for (unsigned c = 0; c < 4; c++) { - tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), ""); - if (stype == GLSL_TYPE_UINT) - tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, ""); - else - tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); - tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, ""); - tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); - result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), ""); - } - } - return result; +static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, nir_variable *var, + struct ac_image_args *args, const nir_tex_instr *instr) +{ + const struct glsl_type *type = glsl_without_array(var->type); + enum glsl_base_type stype = glsl_get_sampler_result_type(type); + LLVMValueRef wa_8888 = NULL; + LLVMValueRef half_texel[2]; + LLVMValueRef result; + + assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT); + + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + LLVMValueRef formats; + LLVMValueRef data_format; + LLVMValueRef wa_formats; + + formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, ""); + + data_format = LLVMBuildLShr(ctx->builder, formats, LLVMConstInt(ctx->i32, 20, false), ""); + data_format = + LLVMBuildAnd(ctx->builder, data_format, LLVMConstInt(ctx->i32, (1u << 6) - 1, false), ""); + wa_8888 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, data_format, + LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), ""); + + uint32_t wa_num_format = stype == GLSL_TYPE_UINT + ? S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) + : S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED); + wa_formats = LLVMBuildAnd(ctx->builder, formats, + LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), ""); + wa_formats = + LLVMBuildOr(ctx->builder, wa_formats, LLVMConstInt(ctx->i32, wa_num_format, false), ""); + + formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, ""); + args->resource = + LLVMBuildInsertElement(ctx->builder, args->resource, formats, ctx->i32_1, ""); + } + + if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) { + assert(!wa_8888); + half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); + } else { + struct ac_image_args resinfo = {}; + LLVMBasicBlockRef bbs[2]; + + LLVMValueRef unnorm = NULL; + LLVMValueRef default_offset = ctx->f32_0; + if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) { + /* In vulkan, whether the sampler uses unnormalized + * coordinates or not is a dynamic property of the + * sampler. Hence, to figure out whether or not we + * need to divide by the texture size, we need to test + * the sampler at runtime. This tests the bit set by + * radv_init_sampler(). + */ + LLVMValueRef sampler0 = + LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, ""); + sampler0 = LLVMBuildLShr(ctx->builder, sampler0, LLVMConstInt(ctx->i32, 15, false), ""); + sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, ""); + unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, ""); + default_offset = LLVMConstReal(ctx->f32, -0.5); + } + + bbs[0] = LLVMGetInsertBlock(ctx->builder); + if (wa_8888 || unnorm) { + assert(!(wa_8888 && unnorm)); + LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm; + /* Skip the texture size query entirely if we don't need it. */ + ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000); + bbs[1] = LLVMGetInsertBlock(ctx->builder); + } + + /* Query the texture size. */ + resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array); + resinfo.opcode = ac_image_get_resinfo; + resinfo.dmask = 0xf; + resinfo.lod = ctx->i32_0; + resinfo.resource = args->resource; + resinfo.attributes = AC_FUNC_ATTR_READNONE; + LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo); + + /* Compute -0.5 / size. */ + for (unsigned c = 0; c < 2; c++) { + half_texel[c] = + LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, 0), ""); + half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); + half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]); + half_texel[c] = + LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), ""); + } + + if (wa_8888 || unnorm) { + ac_build_endif(ctx, 2000); + + for (unsigned c = 0; c < 2; c++) { + LLVMValueRef values[2] = {default_offset, half_texel[c]}; + half_texel[c] = ac_build_phi(ctx, ctx->f32, 2, values, bbs); + } + } + } + + for (unsigned c = 0; c < 2; c++) { + LLVMValueRef tmp; + tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, ""); + args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); + } + + args->attributes = AC_FUNC_ATTR_READNONE; + result = ac_build_image_opcode(ctx, args); + + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + LLVMValueRef tmp, tmp2; + + /* if the cube workaround is in place, f2i the result. */ + for (unsigned c = 0; c < 4; c++) { + tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), ""); + if (stype == GLSL_TYPE_UINT) + tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, ""); + else + tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, ""); + tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); + tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, ""); + tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, ""); + tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); + result = + LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), ""); + } + } + return result; } static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr) { - nir_deref_instr *texture_deref_instr = NULL; + nir_deref_instr *texture_deref_instr = NULL; - for (unsigned i = 0; i < instr->num_srcs; i++) { - switch (instr->src[i].src_type) { - case nir_tex_src_texture_deref: - texture_deref_instr = nir_src_as_deref(instr->src[i].src); - break; - default: - break; - } - } - return texture_deref_instr; -} - -static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, - const nir_tex_instr *instr, - struct ac_image_args *args) -{ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); - - assert(instr->dest.is_ssa); - return ac_build_buffer_load_format(&ctx->ac, - args->resource, - args->coords[0], - ctx->ac.i32_0, - util_last_bit(mask), - 0, true, - instr->dest.ssa.bit_size == 16); - } - - args->opcode = ac_image_sample; - - switch (instr->op) { - case nir_texop_txf: - case nir_texop_txf_ms: - case nir_texop_samples_identical: - args->opcode = args->level_zero || - instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? - ac_image_load : ac_image_load_mip; - args->level_zero = false; - break; - case nir_texop_txs: - case nir_texop_query_levels: - args->opcode = ac_image_get_resinfo; - if (!args->lod) - args->lod = ctx->ac.i32_0; - args->level_zero = false; - break; - case nir_texop_tex: - if (ctx->stage != MESA_SHADER_FRAGMENT) { - assert(!args->lod); - args->level_zero = true; - } - break; - case nir_texop_tg4: - args->opcode = ac_image_gather4; - if (!args->lod && !args->bias) - args->level_zero = true; - break; - case nir_texop_lod: - args->opcode = ac_image_get_lod; - break; - case nir_texop_fragment_fetch: - case nir_texop_fragment_mask_fetch: - args->opcode = ac_image_load; - args->level_zero = false; - break; - default: - break; - } - - if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) { - nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr); - nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr); - const struct glsl_type *type = glsl_without_array(var->type); - enum glsl_base_type stype = glsl_get_sampler_result_type(type); - if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { - return lower_gather4_integer(&ctx->ac, var, args, instr); - } - } - - /* Fixup for GFX9 which allocates 1D textures as 2D. */ - if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) { - if ((args->dim == ac_image_2darray || - args->dim == ac_image_2d) && !args->coords[1]) { - args->coords[1] = ctx->ac.i32_0; - } - } - - args->attributes = AC_FUNC_ATTR_READNONE; - bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE && - ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE; - if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) { - /* Prevent texture instructions with implicit derivatives from being - * sinked into branches. */ - switch (instr->op) { - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_lod: - args->attributes |= AC_FUNC_ATTR_CONVERGENT; - break; - default: - break; - } - } + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_texture_deref: + texture_deref_instr = nir_src_as_deref(instr->src[i].src); + break; + default: + break; + } + } + return texture_deref_instr; +} + +static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr, + struct ac_image_args *args) +{ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); + + assert(instr->dest.is_ssa); + return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0, + util_last_bit(mask), 0, true, + instr->dest.ssa.bit_size == 16); + } + + args->opcode = ac_image_sample; + + switch (instr->op) { + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_samples_identical: + args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS + ? ac_image_load + : ac_image_load_mip; + args->level_zero = false; + break; + case nir_texop_txs: + case nir_texop_query_levels: + args->opcode = ac_image_get_resinfo; + if (!args->lod) + args->lod = ctx->ac.i32_0; + args->level_zero = false; + break; + case nir_texop_tex: + if (ctx->stage != MESA_SHADER_FRAGMENT) { + assert(!args->lod); + args->level_zero = true; + } + break; + case nir_texop_tg4: + args->opcode = ac_image_gather4; + if (!args->lod && !args->bias) + args->level_zero = true; + break; + case nir_texop_lod: + args->opcode = ac_image_get_lod; + break; + case nir_texop_fragment_fetch: + case nir_texop_fragment_mask_fetch: + args->opcode = ac_image_load; + args->level_zero = false; + break; + default: + break; + } + + if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) { + nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr); + nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr); + const struct glsl_type *type = glsl_without_array(var->type); + enum glsl_base_type stype = glsl_get_sampler_result_type(type); + if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { + return lower_gather4_integer(&ctx->ac, var, args, instr); + } + } + + /* Fixup for GFX9 which allocates 1D textures as 2D. */ + if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) { + if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) { + args->coords[1] = ctx->ac.i32_0; + } + } + + args->attributes = AC_FUNC_ATTR_READNONE; + bool cs_derivs = + ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE; + if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) { + /* Prevent texture instructions with implicit derivatives from being + * sinked into branches. */ + switch (instr->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_lod: + args->attributes |= AC_FUNC_ATTR_CONVERGENT; + break; + default: + break; + } + } - return ac_build_image_opcode(&ctx->ac, args); + return ac_build_image_opcode(&ctx->ac, args); } static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - LLVMValueRef ptr = get_src(ctx, instr->src[0]); - LLVMValueRef index = get_src(ctx, instr->src[1]); + LLVMValueRef ptr = get_src(ctx, instr->src[0]); + LLVMValueRef index = get_src(ctx, instr->src[1]); - LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); - LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md); - return result; -} - -static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - LLVMValueRef ptr, addr; - LLVMValueRef src0 = get_src(ctx, instr->src[0]); - unsigned index = nir_intrinsic_base(instr); - - addr = LLVMConstInt(ctx->ac.i32, index, 0); - addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, ""); - - /* Load constant values from user SGPRS when possible, otherwise - * fallback to the default path that loads directly from memory. - */ - if (LLVMIsConstant(src0) && - instr->dest.ssa.bit_size == 32) { - unsigned count = instr->dest.ssa.num_components; - unsigned offset = index; - - offset += LLVMConstIntGetZExtValue(src0); - offset /= 4; - - offset -= ctx->args->base_inline_push_consts; - - unsigned num_inline_push_consts = ctx->args->num_inline_push_consts; - if (offset + count <= num_inline_push_consts) { - LLVMValueRef push_constants[num_inline_push_consts]; - for (unsigned i = 0; i < num_inline_push_consts; i++) - push_constants[i] = ac_get_arg(&ctx->ac, - ctx->args->inline_push_consts[i]); - return ac_build_gather_values(&ctx->ac, - push_constants + offset, - count); - } - } - - ptr = LLVMBuildGEP(ctx->ac.builder, - ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, ""); - - if (instr->dest.ssa.bit_size == 8) { - unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1; - LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords); - ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); - LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); - - LLVMValueRef params[3]; - if (load_dwords > 1) { - LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, ""); - params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), ""); - params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), ""); - } else { - res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, ""); - params[0] = ctx->ac.i32_0; - params[1] = res; - } - params[2] = addr; - res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0); - - res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), ""); - if (instr->dest.ssa.num_components > 1) - res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), ""); - return res; - } else if (instr->dest.ssa.bit_size == 16) { - unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1; - LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords); - ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); - LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); - res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, ""); - LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, ""); - cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, ""); - LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), - LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), - LLVMConstInt(ctx->ac.i32, 4, false)}; - LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components); - LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components); - LLVMValueRef shuffle_aligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, ""); - LLVMValueRef shuffle_unaligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, ""); - res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, ""); - return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), ""); - } + LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); + LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + return result; +} + +static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + LLVMValueRef ptr, addr; + LLVMValueRef src0 = get_src(ctx, instr->src[0]); + unsigned index = nir_intrinsic_base(instr); + + addr = LLVMConstInt(ctx->ac.i32, index, 0); + addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, ""); + + /* Load constant values from user SGPRS when possible, otherwise + * fallback to the default path that loads directly from memory. + */ + if (LLVMIsConstant(src0) && instr->dest.ssa.bit_size == 32) { + unsigned count = instr->dest.ssa.num_components; + unsigned offset = index; + + offset += LLVMConstIntGetZExtValue(src0); + offset /= 4; + + offset -= ctx->args->base_inline_push_consts; + + unsigned num_inline_push_consts = ctx->args->num_inline_push_consts; + if (offset + count <= num_inline_push_consts) { + LLVMValueRef push_constants[num_inline_push_consts]; + for (unsigned i = 0; i < num_inline_push_consts; i++) + push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[i]); + return ac_build_gather_values(&ctx->ac, push_constants + offset, count); + } + } + + ptr = + LLVMBuildGEP(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, ""); + + if (instr->dest.ssa.bit_size == 8) { + unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1; + LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords); + ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); + LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + + LLVMValueRef params[3]; + if (load_dwords > 1) { + LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, ""); + params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, + LLVMConstInt(ctx->ac.i32, 1, false), ""); + params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, + LLVMConstInt(ctx->ac.i32, 0, false), ""); + } else { + res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, ""); + params[0] = ctx->ac.i32_0; + params[1] = res; + } + params[2] = addr; + res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0); + + res = LLVMBuildTrunc( + ctx->ac.builder, res, + LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), ""); + if (instr->dest.ssa.num_components > 1) + res = LLVMBuildBitCast(ctx->ac.builder, res, + LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), ""); + return res; + } else if (instr->dest.ssa.bit_size == 16) { + unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1; + LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords); + ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); + LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, ""); + LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, ""); + cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, ""); + LLVMValueRef mask[] = { + LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), + LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), + LLVMConstInt(ctx->ac.i32, 4, false)}; + LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components); + LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components); + LLVMValueRef shuffle_aligned = + LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, ""); + LLVMValueRef shuffle_unaligned = + LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, ""); + res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, ""); + return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), ""); + } - ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa)); + ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa)); - return LLVMBuildLoad(ctx->ac.builder, ptr, ""); + return LLVMBuildLoad(ctx->ac.builder, ptr, ""); } static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { - LLVMValueRef index = get_src(ctx, instr->src[0]); + LLVMValueRef index = get_src(ctx, instr->src[0]); - return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false); + return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false); } static uint32_t widen_mask(uint32_t mask, unsigned multiplier) { - uint32_t new_mask = 0; - for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i) - if (mask & (1u << i)) - new_mask |= ((1u << multiplier) - 1u) << (i * multiplier); - return new_mask; + uint32_t new_mask = 0; + for (unsigned i = 0; i < 32 && (1u << i) <= mask; ++i) + if (mask & (1u << i)) + new_mask |= ((1u << multiplier) - 1u) << (i * multiplier); + return new_mask; } static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned start, unsigned count) { - LLVMValueRef mask[] = { - ctx->i32_0, ctx->i32_1, - LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) }; - - unsigned src_elements = ac_get_llvm_num_components(src); - - if (count == src_elements) { - assert(start == 0); - return src; - } else if (count == 1) { - assert(start < src_elements); - return LLVMBuildExtractElement(ctx->builder, src, mask[start], ""); - } else { - assert(start + count <= src_elements); - assert(count <= 4); - LLVMValueRef swizzle = LLVMConstVector(&mask[start], count); - return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, ""); - } -} - -static unsigned get_cache_policy(struct ac_nir_context *ctx, - enum gl_access_qualifier access, - bool may_store_unaligned, - bool writeonly_memory) -{ - unsigned cache_policy = 0; - - /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All - * store opcodes not aligned to a dword are affected. The only way to - * get unaligned stores is through shader images. - */ - if (((may_store_unaligned && ctx->ac.chip_class == GFX6) || - /* If this is write-only, don't keep data in L1 to prevent - * evicting L1 cache lines that may be needed by other - * instructions. - */ - writeonly_memory || - access & (ACCESS_COHERENT | ACCESS_VOLATILE))) { - cache_policy |= ac_glc; - } - - if (access & ACCESS_STREAM_CACHE_POLICY) - cache_policy |= ac_slc | ac_glc; - - return cache_policy; -} - -static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, - struct waterfall_context *wctx, - const nir_intrinsic_instr *instr, - nir_src src) -{ - return enter_waterfall(ctx, wctx, get_src(ctx, src), - nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); -} - -static void visit_store_ssbo(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, - ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7000); - } - - LLVMValueRef src_data = get_src(ctx, instr->src[0]); - int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8; - unsigned writemask = nir_intrinsic_write_mask(instr); - enum gl_access_qualifier access = nir_intrinsic_access(instr); - bool writeonly_memory = access & ACCESS_NON_READABLE; - unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory); - - struct waterfall_context wctx; - LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]); - - LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true); - LLVMValueRef base_data = src_data; - base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components); - LLVMValueRef base_offset = get_src(ctx, instr->src[2]); - - while (writemask) { - int start, count; - LLVMValueRef data, offset; - LLVMTypeRef data_type; - - u_bit_scan_consecutive_range(&writemask, &start, &count); - - /* Due to an LLVM limitation with LLVM < 9, split 3-element - * writes into a 2-element and a 1-element write. */ - if (count == 3 && - (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) { - writemask |= 1 << (start + 2); - count = 2; - } - int num_bytes = count * elem_size_bytes; /* count in bytes */ - - /* we can only store 4 DWords at the same time. - * can only happen for 64 Bit vectors. */ - if (num_bytes > 16) { - writemask |= ((1u << (count - 2)) - 1u) << (start + 2); - count = 2; - num_bytes = 16; - } - - /* check alignment of 16 Bit stores */ - if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) { - writemask |= ((1u << (count - 1)) - 1u) << (start + 1); - count = 1; - num_bytes = 2; - } - - /* Due to alignment issues, split stores of 8-bit/16-bit - * vectors. - */ - if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) { - writemask |= ((1u << (count - 1)) - 1u) << (start + 1); - count = 1; - num_bytes = elem_size_bytes; - } - - data = extract_vector_range(&ctx->ac, base_data, start, count); - - offset = LLVMBuildAdd(ctx->ac.builder, base_offset, - LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), ""); - - if (num_bytes == 1) { - ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data, - offset, ctx->ac.i32_0, - cache_policy); - } else if (num_bytes == 2) { - ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, - offset, ctx->ac.i32_0, - cache_policy); - } else { - int num_channels = num_bytes / 4; - - switch (num_bytes) { - case 16: /* v4f32 */ - data_type = ctx->ac.v4f32; - break; - case 12: /* v3f32 */ - data_type = ctx->ac.v3f32; - break; - case 8: /* v2f32 */ - data_type = ctx->ac.v2f32; - break; - case 4: /* f32 */ - data_type = ctx->ac.f32; - break; - default: - unreachable("Malformed vector store."); - } - data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, ""); - - ac_build_buffer_store_dword(&ctx->ac, rsrc, data, - num_channels, offset, - ctx->ac.i32_0, 0, - cache_policy); - } - } - - exit_waterfall(ctx, &wctx, NULL); - - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7000); -} - -static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, - LLVMValueRef descriptor, - LLVMValueRef offset, - LLVMValueRef compare, - LLVMValueRef exchange) -{ - LLVMBasicBlockRef start_block = NULL, then_block = NULL; - if (ctx->abi->robust_buffer_access) { - LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2); - - LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, ""); - start_block = LLVMGetInsertBlock(ctx->ac.builder); - - ac_build_ifcc(&ctx->ac, cond, -1); - - then_block = LLVMGetInsertBlock(ctx->ac.builder); - } - - LLVMValueRef ptr_parts[2] = { - ac_llvm_extract_elem(&ctx->ac, descriptor, 0), - LLVMBuildAnd(ctx->ac.builder, - ac_llvm_extract_elem(&ctx->ac, descriptor, 1), - LLVMConstInt(ctx->ac.i32, 65535, 0), "") - }; - - ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, ""); - ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, ""); - - offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, ""); - - LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2); - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, ""); - ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, ""); - ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL), ""); - - LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as"); - result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, ""); - - if (ctx->abi->robust_buffer_access) { - ac_build_endif(&ctx->ac, -1); - - LLVMBasicBlockRef incoming_blocks[2] = { - start_block, - then_block, - }; - - LLVMValueRef incoming_values[2] = { - LLVMConstInt(ctx->ac.i64, 0, 0), - result, - }; - LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, ""); - LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2); - return ret; - } else { - return result; - } -} - -static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, - ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7001); - } - - LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2])); - const char *op; - char name[64], type[8]; - LLVMValueRef params[6], descriptor; - LLVMValueRef result; - int arg_count = 0; - - struct waterfall_context wctx; - LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]); - - switch (instr->intrinsic) { - case nir_intrinsic_ssbo_atomic_add: - op = "add"; - break; - case nir_intrinsic_ssbo_atomic_imin: - op = "smin"; - break; - case nir_intrinsic_ssbo_atomic_umin: - op = "umin"; - break; - case nir_intrinsic_ssbo_atomic_imax: - op = "smax"; - break; - case nir_intrinsic_ssbo_atomic_umax: - op = "umax"; - break; - case nir_intrinsic_ssbo_atomic_and: - op = "and"; - break; - case nir_intrinsic_ssbo_atomic_or: - op = "or"; - break; - case nir_intrinsic_ssbo_atomic_xor: - op = "xor"; - break; - case nir_intrinsic_ssbo_atomic_exchange: - op = "swap"; - break; - case nir_intrinsic_ssbo_atomic_comp_swap: - op = "cmpswap"; - break; - default: - abort(); - } - - descriptor = ctx->abi->load_ssbo(ctx->abi, - rsrc_base, - true); - - if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap && - return_type == ctx->ac.i64) { - result = emit_ssbo_comp_swap_64(ctx, descriptor, - get_src(ctx, instr->src[1]), - get_src(ctx, instr->src[2]), - get_src(ctx, instr->src[3])); - } else { - if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { - params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); - } - params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); - params[arg_count++] = descriptor; - - if (LLVM_VERSION_MAJOR >= 9) { - /* XXX: The new raw/struct atomic intrinsics are buggy with - * LLVM 8, see r358579. - */ - params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ - params[arg_count++] = ctx->ac.i32_0; /* soffset */ - params[arg_count++] = ctx->ac.i32_0; /* slc */ - - ac_build_type_name_for_intr(return_type, type, sizeof(type)); - snprintf(name, sizeof(name), - "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type); - } else { - params[arg_count++] = ctx->ac.i32_0; /* vindex */ - params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ - params[arg_count++] = ctx->ac.i1false; /* slc */ - - assert(return_type == ctx->ac.i32); - snprintf(name, sizeof(name), - "llvm.amdgcn.buffer.atomic.%s", op); - } - - result = ac_build_intrinsic(&ctx->ac, name, return_type, params, - arg_count, 0); - } - - result = exit_waterfall(ctx, &wctx, result); - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7001); - return result; -} - -static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - struct waterfall_context wctx; - LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]); - - int elem_size_bytes = instr->dest.ssa.bit_size / 8; - int num_components = instr->num_components; - enum gl_access_qualifier access = nir_intrinsic_access(instr); - unsigned cache_policy = get_cache_policy(ctx, access, false, false); - - LLVMValueRef offset = get_src(ctx, instr->src[1]); - LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false); - LLVMValueRef vindex = ctx->ac.i32_0; - - LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa); - LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type; - - LLVMValueRef results[4]; - for (int i = 0; i < num_components;) { - int num_elems = num_components - i; - if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0) - num_elems = 1; - if (num_elems * elem_size_bytes > 16) - num_elems = 16 / elem_size_bytes; - int load_bytes = num_elems * elem_size_bytes; - - LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false); - - LLVMValueRef ret; - - if (load_bytes == 1) { - ret = ac_build_tbuffer_load_byte(&ctx->ac, - rsrc, - offset, - ctx->ac.i32_0, - immoffset, - cache_policy); - } else if (load_bytes == 2) { - ret = ac_build_tbuffer_load_short(&ctx->ac, - rsrc, - offset, - ctx->ac.i32_0, - immoffset, - cache_policy); - } else { - int num_channels = util_next_power_of_two(load_bytes) / 4; - bool can_speculate = access & ACCESS_CAN_REORDER; - - ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, - vindex, offset, immoffset, 0, - cache_policy, can_speculate, false); - } - - LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret))); - ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, ""); - ret = ac_trim_vector(&ctx->ac, ret, load_bytes); - - LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems); - ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, ""); - - for (unsigned j = 0; j < num_elems; j++) { - results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), ""); - } - i += num_elems; - } - - LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components); - return exit_waterfall(ctx, &wctx, ret); -} - -static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx, - struct waterfall_context *wctx, - const nir_intrinsic_instr *instr) -{ - return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]), - nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); -} - -static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - struct waterfall_context wctx; - LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr); - - LLVMValueRef ret; - LLVMValueRef rsrc = rsrc_base; - LLVMValueRef offset = get_src(ctx, instr->src[1]); - int num_components = instr->num_components; - - if (ctx->abi->load_ubo) - rsrc = ctx->abi->load_ubo(ctx->abi, rsrc); - - if (instr->dest.ssa.bit_size == 64) - num_components *= 2; - - if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) { - unsigned load_bytes = instr->dest.ssa.bit_size / 8; - LLVMValueRef results[num_components]; - for (unsigned i = 0; i < num_components; ++i) { - LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, - load_bytes * i, 0); - - if (load_bytes == 1) { - results[i] = ac_build_tbuffer_load_byte(&ctx->ac, - rsrc, - offset, - ctx->ac.i32_0, - immoffset, - 0); - } else { - assert(load_bytes == 2); - results[i] = ac_build_tbuffer_load_short(&ctx->ac, - rsrc, - offset, - ctx->ac.i32_0, - immoffset, - 0); - } - } - ret = ac_build_gather_values(&ctx->ac, results, num_components); - } else { - ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, - NULL, 0, 0, true, true); - - ret = ac_trim_vector(&ctx->ac, ret, num_components); - } - - ret = LLVMBuildBitCast(ctx->ac.builder, ret, - get_def_type(ctx, &instr->dest.ssa), ""); - - return exit_waterfall(ctx, &wctx, ret); -} - -static void -get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr, - bool vs_in, unsigned *vertex_index_out, - LLVMValueRef *vertex_index_ref, - unsigned *const_out, LLVMValueRef *indir_out) -{ - nir_variable *var = nir_deref_instr_get_variable(instr); - nir_deref_path path; - unsigned idx_lvl = 1; - - nir_deref_path_init(&path, instr, NULL); - - if (vertex_index_out != NULL || vertex_index_ref != NULL) { - if (vertex_index_ref) { - *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index); - if (vertex_index_out) - *vertex_index_out = 0; - } else { - *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index); - } - ++idx_lvl; - } - - uint32_t const_offset = 0; - LLVMValueRef offset = NULL; - - if (var->data.compact) { - assert(instr->deref_type == nir_deref_type_array); - const_offset = nir_src_as_uint(instr->arr.index); - goto out; - } - - for (; path.path[idx_lvl]; ++idx_lvl) { - const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type; - if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) { - unsigned index = path.path[idx_lvl]->strct.index; - - for (unsigned i = 0; i < index; i++) { - const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); - const_offset += glsl_count_attribute_slots(ft, vs_in); - } - } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) { - unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in); - if (nir_src_is_const(path.path[idx_lvl]->arr.index)) { - const_offset += size * - nir_src_as_uint(path.path[idx_lvl]->arr.index); - } else { - LLVMValueRef array_off = LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0), - get_src(ctx, path.path[idx_lvl]->arr.index), ""); - if (offset) - offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, ""); - else - offset = array_off; - } - } else - unreachable("Uhandled deref type in get_deref_instr_offset"); - } + LLVMValueRef mask[] = {ctx->i32_0, ctx->i32_1, LLVMConstInt(ctx->i32, 2, false), + LLVMConstInt(ctx->i32, 3, false)}; + + unsigned src_elements = ac_get_llvm_num_components(src); + + if (count == src_elements) { + assert(start == 0); + return src; + } else if (count == 1) { + assert(start < src_elements); + return LLVMBuildExtractElement(ctx->builder, src, mask[start], ""); + } else { + assert(start + count <= src_elements); + assert(count <= 4); + LLVMValueRef swizzle = LLVMConstVector(&mask[start], count); + return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, ""); + } +} + +static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access, + bool may_store_unaligned, bool writeonly_memory) +{ + unsigned cache_policy = 0; + + /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All + * store opcodes not aligned to a dword are affected. The only way to + * get unaligned stores is through shader images. + */ + if (((may_store_unaligned && ctx->ac.chip_class == GFX6) || + /* If this is write-only, don't keep data in L1 to prevent + * evicting L1 cache lines that may be needed by other + * instructions. + */ + writeonly_memory || access & (ACCESS_COHERENT | ACCESS_VOLATILE))) { + cache_policy |= ac_glc; + } + + if (access & ACCESS_STREAM_CACHE_POLICY) + cache_policy |= ac_slc | ac_glc; + + return cache_policy; +} + +static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct waterfall_context *wctx, + const nir_intrinsic_instr *instr, nir_src src) +{ + return enter_waterfall(ctx, wctx, get_src(ctx, src), + nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); +} + +static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + if (ctx->ac.postponed_kill) { + LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, ""); + ac_build_ifcc(&ctx->ac, cond, 7000); + } + + LLVMValueRef src_data = get_src(ctx, instr->src[0]); + int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8; + unsigned writemask = nir_intrinsic_write_mask(instr); + enum gl_access_qualifier access = nir_intrinsic_access(instr); + bool writeonly_memory = access & ACCESS_NON_READABLE; + unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory); + + struct waterfall_context wctx; + LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]); + + LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true); + LLVMValueRef base_data = src_data; + base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components); + LLVMValueRef base_offset = get_src(ctx, instr->src[2]); + + while (writemask) { + int start, count; + LLVMValueRef data, offset; + LLVMTypeRef data_type; + + u_bit_scan_consecutive_range(&writemask, &start, &count); + + /* Due to an LLVM limitation with LLVM < 9, split 3-element + * writes into a 2-element and a 1-element write. */ + if (count == 3 && (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) { + writemask |= 1 << (start + 2); + count = 2; + } + int num_bytes = count * elem_size_bytes; /* count in bytes */ + + /* we can only store 4 DWords at the same time. + * can only happen for 64 Bit vectors. */ + if (num_bytes > 16) { + writemask |= ((1u << (count - 2)) - 1u) << (start + 2); + count = 2; + num_bytes = 16; + } + + /* check alignment of 16 Bit stores */ + if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) { + writemask |= ((1u << (count - 1)) - 1u) << (start + 1); + count = 1; + num_bytes = 2; + } + + /* Due to alignment issues, split stores of 8-bit/16-bit + * vectors. + */ + if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) { + writemask |= ((1u << (count - 1)) - 1u) << (start + 1); + count = 1; + num_bytes = elem_size_bytes; + } + + data = extract_vector_range(&ctx->ac, base_data, start, count); + + offset = LLVMBuildAdd(ctx->ac.builder, base_offset, + LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), ""); + + if (num_bytes == 1) { + ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy); + } else if (num_bytes == 2) { + ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy); + } else { + int num_channels = num_bytes / 4; + + switch (num_bytes) { + case 16: /* v4f32 */ + data_type = ctx->ac.v4f32; + break; + case 12: /* v3f32 */ + data_type = ctx->ac.v3f32; + break; + case 8: /* v2f32 */ + data_type = ctx->ac.v2f32; + break; + case 4: /* f32 */ + data_type = ctx->ac.f32; + break; + default: + unreachable("Malformed vector store."); + } + data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, ""); + + ac_build_buffer_store_dword(&ctx->ac, rsrc, data, num_channels, offset, ctx->ac.i32_0, 0, + cache_policy); + } + } + + exit_waterfall(ctx, &wctx, NULL); + + if (ctx->ac.postponed_kill) + ac_build_endif(&ctx->ac, 7000); +} + +static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef descriptor, + LLVMValueRef offset, LLVMValueRef compare, + LLVMValueRef exchange) +{ + LLVMBasicBlockRef start_block = NULL, then_block = NULL; + if (ctx->abi->robust_buffer_access) { + LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2); + + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, ""); + start_block = LLVMGetInsertBlock(ctx->ac.builder); + + ac_build_ifcc(&ctx->ac, cond, -1); + + then_block = LLVMGetInsertBlock(ctx->ac.builder); + } + + LLVMValueRef ptr_parts[2] = { + ac_llvm_extract_elem(&ctx->ac, descriptor, 0), + LLVMBuildAnd(ctx->ac.builder, ac_llvm_extract_elem(&ctx->ac, descriptor, 1), + LLVMConstInt(ctx->ac.i32, 65535, 0), "")}; + + ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, ""); + ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, ""); + + offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, ""); + + LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2); + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, ""); + ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, ""); + ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL), + ""); + + LLVMValueRef result = + ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as"); + result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, ""); + + if (ctx->abi->robust_buffer_access) { + ac_build_endif(&ctx->ac, -1); + + LLVMBasicBlockRef incoming_blocks[2] = { + start_block, + then_block, + }; + + LLVMValueRef incoming_values[2] = { + LLVMConstInt(ctx->ac.i64, 0, 0), + result, + }; + LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, ""); + LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2); + return ret; + } else { + return result; + } +} + +static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + if (ctx->ac.postponed_kill) { + LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, ""); + ac_build_ifcc(&ctx->ac, cond, 7001); + } + + LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2])); + const char *op; + char name[64], type[8]; + LLVMValueRef params[6], descriptor; + LLVMValueRef result; + int arg_count = 0; + + struct waterfall_context wctx; + LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]); + + switch (instr->intrinsic) { + case nir_intrinsic_ssbo_atomic_add: + op = "add"; + break; + case nir_intrinsic_ssbo_atomic_imin: + op = "smin"; + break; + case nir_intrinsic_ssbo_atomic_umin: + op = "umin"; + break; + case nir_intrinsic_ssbo_atomic_imax: + op = "smax"; + break; + case nir_intrinsic_ssbo_atomic_umax: + op = "umax"; + break; + case nir_intrinsic_ssbo_atomic_and: + op = "and"; + break; + case nir_intrinsic_ssbo_atomic_or: + op = "or"; + break; + case nir_intrinsic_ssbo_atomic_xor: + op = "xor"; + break; + case nir_intrinsic_ssbo_atomic_exchange: + op = "swap"; + break; + case nir_intrinsic_ssbo_atomic_comp_swap: + op = "cmpswap"; + break; + default: + abort(); + } + + descriptor = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true); + + if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap && return_type == ctx->ac.i64) { + result = emit_ssbo_comp_swap_64(ctx, descriptor, get_src(ctx, instr->src[1]), + get_src(ctx, instr->src[2]), get_src(ctx, instr->src[3])); + } else { + if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); + } + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); + params[arg_count++] = descriptor; + + if (LLVM_VERSION_MAJOR >= 9) { + /* XXX: The new raw/struct atomic intrinsics are buggy with + * LLVM 8, see r358579. + */ + params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ + params[arg_count++] = ctx->ac.i32_0; /* soffset */ + params[arg_count++] = ctx->ac.i32_0; /* slc */ + + ac_build_type_name_for_intr(return_type, type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type); + } else { + params[arg_count++] = ctx->ac.i32_0; /* vindex */ + params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ + params[arg_count++] = ctx->ac.i1false; /* slc */ + + assert(return_type == ctx->ac.i32); + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.atomic.%s", op); + } + + result = ac_build_intrinsic(&ctx->ac, name, return_type, params, arg_count, 0); + } + + result = exit_waterfall(ctx, &wctx, result); + if (ctx->ac.postponed_kill) + ac_build_endif(&ctx->ac, 7001); + return result; +} + +static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + struct waterfall_context wctx; + LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]); + + int elem_size_bytes = instr->dest.ssa.bit_size / 8; + int num_components = instr->num_components; + enum gl_access_qualifier access = nir_intrinsic_access(instr); + unsigned cache_policy = get_cache_policy(ctx, access, false, false); + + LLVMValueRef offset = get_src(ctx, instr->src[1]); + LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false); + LLVMValueRef vindex = ctx->ac.i32_0; + + LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa); + LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type; + + LLVMValueRef results[4]; + for (int i = 0; i < num_components;) { + int num_elems = num_components - i; + if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0) + num_elems = 1; + if (num_elems * elem_size_bytes > 16) + num_elems = 16 / elem_size_bytes; + int load_bytes = num_elems * elem_size_bytes; + + LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false); + + LLVMValueRef ret; + + if (load_bytes == 1) { + ret = ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, + cache_policy); + } else if (load_bytes == 2) { + ret = ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, + cache_policy); + } else { + int num_channels = util_next_power_of_two(load_bytes) / 4; + bool can_speculate = access & ACCESS_CAN_REORDER; + + ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, offset, immoffset, 0, + cache_policy, can_speculate, false); + } + + LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret))); + ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, ""); + ret = ac_trim_vector(&ctx->ac, ret, load_bytes); + + LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems); + ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, ""); + + for (unsigned j = 0; j < num_elems; j++) { + results[i + j] = + LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), ""); + } + i += num_elems; + } + + LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components); + return exit_waterfall(ctx, &wctx, ret); +} + +static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx, struct waterfall_context *wctx, + const nir_intrinsic_instr *instr) +{ + return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]), + nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); +} + +static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + struct waterfall_context wctx; + LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr); + + LLVMValueRef ret; + LLVMValueRef rsrc = rsrc_base; + LLVMValueRef offset = get_src(ctx, instr->src[1]); + int num_components = instr->num_components; + + if (ctx->abi->load_ubo) + rsrc = ctx->abi->load_ubo(ctx->abi, rsrc); + + if (instr->dest.ssa.bit_size == 64) + num_components *= 2; + + if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) { + unsigned load_bytes = instr->dest.ssa.bit_size / 8; + LLVMValueRef results[num_components]; + for (unsigned i = 0; i < num_components; ++i) { + LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, load_bytes * i, 0); + + if (load_bytes == 1) { + results[i] = + ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, 0); + } else { + assert(load_bytes == 2); + results[i] = + ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, 0); + } + } + ret = ac_build_gather_values(&ctx->ac, results, num_components); + } else { + ret = + ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, 0, 0, true, true); + + ret = ac_trim_vector(&ctx->ac, ret, num_components); + } + + ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); + + return exit_waterfall(ctx, &wctx, ret); +} + +static void get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr, bool vs_in, + unsigned *vertex_index_out, LLVMValueRef *vertex_index_ref, + unsigned *const_out, LLVMValueRef *indir_out) +{ + nir_variable *var = nir_deref_instr_get_variable(instr); + nir_deref_path path; + unsigned idx_lvl = 1; + + nir_deref_path_init(&path, instr, NULL); + + if (vertex_index_out != NULL || vertex_index_ref != NULL) { + if (vertex_index_ref) { + *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index); + if (vertex_index_out) + *vertex_index_out = 0; + } else { + *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index); + } + ++idx_lvl; + } + + uint32_t const_offset = 0; + LLVMValueRef offset = NULL; + + if (var->data.compact) { + assert(instr->deref_type == nir_deref_type_array); + const_offset = nir_src_as_uint(instr->arr.index); + goto out; + } + + for (; path.path[idx_lvl]; ++idx_lvl) { + const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type; + if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) { + unsigned index = path.path[idx_lvl]->strct.index; + + for (unsigned i = 0; i < index; i++) { + const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); + const_offset += glsl_count_attribute_slots(ft, vs_in); + } + } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) { + unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in); + if (nir_src_is_const(path.path[idx_lvl]->arr.index)) { + const_offset += size * nir_src_as_uint(path.path[idx_lvl]->arr.index); + } else { + LLVMValueRef array_off = + LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0), + get_src(ctx, path.path[idx_lvl]->arr.index), ""); + if (offset) + offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, ""); + else + offset = array_off; + } + } else + unreachable("Uhandled deref type in get_deref_instr_offset"); + } out: - nir_deref_path_finish(&path); + nir_deref_path_finish(&path); + + if (const_offset && offset) + offset = + LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, const_offset, 0), ""); - if (const_offset && offset) - offset = LLVMBuildAdd(ctx->ac.builder, offset, - LLVMConstInt(ctx->ac.i32, const_offset, 0), - ""); - - *const_out = const_offset; - *indir_out = offset; -} - -static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr, - bool load_inputs) -{ - LLVMValueRef result; - LLVMValueRef vertex_index = NULL; - LLVMValueRef indir_index = NULL; - unsigned const_index = 0; - - nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); - - unsigned location = var->data.location; - unsigned driver_location = var->data.driver_location; - const bool is_patch = var->data.patch || - var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || - var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER; - const bool is_compact = var->data.compact; - - get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), - false, NULL, is_patch ? NULL : &vertex_index, - &const_index, &indir_index); - - LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa); - - LLVMTypeRef src_component_type; - if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind) - src_component_type = LLVMGetElementType(dest_type); - else - src_component_type = dest_type; - - result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type, - vertex_index, indir_index, - const_index, location, driver_location, - var->data.location_frac, - instr->num_components, - is_patch, is_compact, load_inputs); - if (instr->dest.ssa.bit_size == 16) { - result = ac_to_integer(&ctx->ac, result); - result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, ""); - } - return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, ""); + *const_out = const_offset; + *indir_out = offset; } -static unsigned -type_scalar_size_bytes(const struct glsl_type *type) +static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, nir_intrinsic_instr *instr, + bool load_inputs) { - assert(glsl_type_is_vector_or_scalar(type) || - glsl_type_is_matrix(type)); + LLVMValueRef result; + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + + nir_variable *var = + nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); + + unsigned location = var->data.location; + unsigned driver_location = var->data.driver_location; + const bool is_patch = var->data.patch || var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || + var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER; + const bool is_compact = var->data.compact; + + get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), false, NULL, + is_patch ? NULL : &vertex_index, &const_index, &indir_index); + + LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa); + + LLVMTypeRef src_component_type; + if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind) + src_component_type = LLVMGetElementType(dest_type); + else + src_component_type = dest_type; + + result = + ctx->abi->load_tess_varyings(ctx->abi, src_component_type, vertex_index, indir_index, + const_index, location, driver_location, var->data.location_frac, + instr->num_components, is_patch, is_compact, load_inputs); + if (instr->dest.ssa.bit_size == 16) { + result = ac_to_integer(&ctx->ac, result); + result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, ""); + } + return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, ""); +} + +static unsigned type_scalar_size_bytes(const struct glsl_type *type) +{ + assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type)); return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; } -static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); - nir_variable *var = nir_deref_instr_get_variable(deref); + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); - LLVMValueRef values[8]; - int idx = 0; - int ve = instr->dest.ssa.num_components; - unsigned comp = 0; - LLVMValueRef indir_index; - LLVMValueRef ret; - unsigned const_index; - unsigned stride = 4; - int mode = deref->mode; - - if (var) { - bool vs_in = ctx->stage == MESA_SHADER_VERTEX && - var->data.mode == nir_var_shader_in; - idx = var->data.driver_location; - comp = var->data.location_frac; - mode = var->data.mode; - - get_deref_offset(ctx, deref, vs_in, NULL, NULL, - &const_index, &indir_index); - - if (var->data.compact) { - stride = 1; - const_index += comp; - comp = 0; - } - } - - if (instr->dest.ssa.bit_size == 64 && - (deref->mode == nir_var_shader_in || - deref->mode == nir_var_shader_out || - deref->mode == nir_var_function_temp)) - ve *= 2; - - switch (mode) { - case nir_var_shader_in: - if (ctx->stage == MESA_SHADER_TESS_CTRL || - ctx->stage == MESA_SHADER_TESS_EVAL) { - return load_tess_varyings(ctx, instr, true); - } - - if (ctx->stage == MESA_SHADER_GEOMETRY) { - LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); - LLVMValueRef indir_index; - unsigned const_index, vertex_index; - get_deref_offset(ctx, deref, false, &vertex_index, NULL, - &const_index, &indir_index); - assert(indir_index == NULL); - - return ctx->abi->load_inputs(ctx->abi, var->data.location, - var->data.driver_location, - var->data.location_frac, - instr->num_components, vertex_index, const_index, type); - } - - for (unsigned chan = comp; chan < ve + comp; chan++) { - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - var->type, - ctx->stage == MESA_SHADER_VERTEX); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->abi->inputs + idx + chan, count, - stride, false, true); - - values[chan] = LLVMBuildExtractElement(ctx->ac.builder, - tmp_vec, - indir_index, ""); - } else - values[chan] = ctx->abi->inputs[idx + chan + const_index * stride]; - } - break; - case nir_var_function_temp: - for (unsigned chan = 0; chan < ve; chan++) { - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - var->type, false); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->locals + idx + chan, count, - stride, true, true); - - values[chan] = LLVMBuildExtractElement(ctx->ac.builder, - tmp_vec, - indir_index, ""); - } else { - values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], ""); - } - } - break; - case nir_var_shader_out: - if (ctx->stage == MESA_SHADER_TESS_CTRL) { - return load_tess_varyings(ctx, instr, false); - } - - if (ctx->stage == MESA_SHADER_FRAGMENT && - var->data.fb_fetch_output && - ctx->abi->emit_fbfetch) - return ctx->abi->emit_fbfetch(ctx->abi); - - for (unsigned chan = comp; chan < ve + comp; chan++) { - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - var->type, false); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->abi->outputs + idx + chan, count, - stride, true, true); - - values[chan] = LLVMBuildExtractElement(ctx->ac.builder, - tmp_vec, - indir_index, ""); - } else { - values[chan] = LLVMBuildLoad(ctx->ac.builder, - ctx->abi->outputs[idx + chan + const_index * stride], - ""); - } - } - break; - case nir_var_mem_global: { - LLVMValueRef address = get_src(ctx, instr->src[0]); - LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa); - unsigned explicit_stride = glsl_get_explicit_stride(deref->type); - unsigned natural_stride = type_scalar_size_bytes(deref->type); - unsigned stride = explicit_stride ? explicit_stride : natural_stride; - int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8; - bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; - - if (stride != natural_stride || split_loads) { - if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind) - result_type = LLVMGetElementType(result_type); - - LLVMTypeRef ptr_type = LLVMPointerType(result_type, - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); - address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); - - for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) { - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0); - values[i] = LLVMBuildLoad(ctx->ac.builder, - ac_build_gep_ptr(&ctx->ac, address, offset), ""); - - if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) - LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic); - } - return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components); - } else { - LLVMTypeRef ptr_type = LLVMPointerType(result_type, - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); - address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); - LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); - - if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) - LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic); - return val; - } - } - default: - unreachable("unhandle variable mode"); - } - ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp); - return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); -} - -static void -visit_store_var(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, - ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7002); - } - - nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); - nir_variable *var = nir_deref_instr_get_variable(deref); - - LLVMValueRef temp_ptr, value; - int idx = 0; - unsigned comp = 0; - LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1])); - int writemask = instr->const_index[0]; - LLVMValueRef indir_index; - unsigned const_index; - - if (var) { - get_deref_offset(ctx, deref, false, - NULL, NULL, &const_index, &indir_index); - idx = var->data.driver_location; - comp = var->data.location_frac; - - if (var->data.compact) { - const_index += comp; - comp = 0; - } - } - - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 && - (deref->mode == nir_var_shader_out || - deref->mode == nir_var_function_temp)) { - - src = LLVMBuildBitCast(ctx->ac.builder, src, - LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), - ""); - - writemask = widen_mask(writemask, 2); - } - - writemask = writemask << comp; - - switch (deref->mode) { - case nir_var_shader_out: - - if (ctx->stage == MESA_SHADER_TESS_CTRL) { - LLVMValueRef vertex_index = NULL; - LLVMValueRef indir_index = NULL; - unsigned const_index = 0; - const bool is_patch = var->data.patch || - var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || - var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER; - - get_deref_offset(ctx, deref, false, NULL, - is_patch ? NULL : &vertex_index, - &const_index, &indir_index); - - ctx->abi->store_tcs_outputs(ctx->abi, var, - vertex_index, indir_index, - const_index, src, writemask); - break; - } - - for (unsigned chan = 0; chan < 8; chan++) { - int stride = 4; - if (!(writemask & (1 << chan))) - continue; - - value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp); - - if (var->data.compact) - stride = 1; - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - var->type, false); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->abi->outputs + idx + chan, count, - stride, true, true); - - tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, - value, indir_index, ""); - build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan, - count, stride, tmp_vec); - - } else { - temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride]; - - LLVMBuildStore(ctx->ac.builder, value, temp_ptr); - } - } - break; - case nir_var_function_temp: - for (unsigned chan = 0; chan < 8; chan++) { - if (!(writemask & (1 << chan))) - continue; - - value = ac_llvm_extract_elem(&ctx->ac, src, chan); - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - var->type, false); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->locals + idx + chan, count, - 4, true, true); - - tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, - value, indir_index, ""); - build_store_values_extended(&ctx->ac, ctx->locals + idx + chan, - count, 4, tmp_vec); - } else { - temp_ptr = ctx->locals[idx + chan + const_index * 4]; - - LLVMBuildStore(ctx->ac.builder, value, temp_ptr); - } - } - break; - - case nir_var_mem_global: { - int writemask = instr->const_index[0]; - LLVMValueRef address = get_src(ctx, instr->src[0]); - LLVMValueRef val = get_src(ctx, instr->src[1]); - - unsigned explicit_stride = glsl_get_explicit_stride(deref->type); - unsigned natural_stride = type_scalar_size_bytes(deref->type); - unsigned stride = explicit_stride ? explicit_stride : natural_stride; - int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8; - bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; - - LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val), - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); - address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); - - if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 && - stride == natural_stride && !split_stores) { - LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val), - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); - address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); - - val = LLVMBuildBitCast(ctx->ac.builder, val, - LLVMGetElementType(LLVMTypeOf(address)), ""); - LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address); - - if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) - LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); - } else { - LLVMTypeRef val_type = LLVMTypeOf(val); - if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind) - val_type = LLVMGetElementType(val_type); - - LLVMTypeRef ptr_type = LLVMPointerType(val_type, - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); - address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); - for (unsigned chan = 0; chan < 4; chan++) { - if (!(writemask & (1 << chan))) - continue; - - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0); - - LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset); - LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val, - chan); - src = LLVMBuildBitCast(ctx->ac.builder, src, - LLVMGetElementType(LLVMTypeOf(ptr)), ""); - LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr); - - if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) - LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); - } - } - break; - } - default: - abort(); - break; - } + LLVMValueRef values[8]; + int idx = 0; + int ve = instr->dest.ssa.num_components; + unsigned comp = 0; + LLVMValueRef indir_index; + LLVMValueRef ret; + unsigned const_index; + unsigned stride = 4; + int mode = deref->mode; + + if (var) { + bool vs_in = ctx->stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in; + idx = var->data.driver_location; + comp = var->data.location_frac; + mode = var->data.mode; + + get_deref_offset(ctx, deref, vs_in, NULL, NULL, &const_index, &indir_index); + + if (var->data.compact) { + stride = 1; + const_index += comp; + comp = 0; + } + } + + if (instr->dest.ssa.bit_size == 64 && + (deref->mode == nir_var_shader_in || deref->mode == nir_var_shader_out || + deref->mode == nir_var_function_temp)) + ve *= 2; + + switch (mode) { + case nir_var_shader_in: + if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) { + return load_tess_varyings(ctx, instr, true); + } + + if (ctx->stage == MESA_SHADER_GEOMETRY) { + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + LLVMValueRef indir_index; + unsigned const_index, vertex_index; + get_deref_offset(ctx, deref, false, &vertex_index, NULL, &const_index, &indir_index); + assert(indir_index == NULL); + + return ctx->abi->load_inputs(ctx->abi, var->data.location, var->data.driver_location, + var->data.location_frac, instr->num_components, vertex_index, + const_index, type); + } + + for (unsigned chan = comp; chan < ve + comp; chan++) { + if (indir_index) { + unsigned count = + glsl_count_attribute_slots(var->type, ctx->stage == MESA_SHADER_VERTEX); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->abi->inputs + idx + chan, count, stride, false, true); + + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); + } else + values[chan] = ctx->abi->inputs[idx + chan + const_index * stride]; + } + break; + case nir_var_function_temp: + for (unsigned chan = 0; chan < ve; chan++) { + if (indir_index) { + unsigned count = glsl_count_attribute_slots(var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->locals + idx + chan, count, stride, true, true); + + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); + } else { + values[chan] = + LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], ""); + } + } + break; + case nir_var_shader_out: + if (ctx->stage == MESA_SHADER_TESS_CTRL) { + return load_tess_varyings(ctx, instr, false); + } + + if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.fb_fetch_output && ctx->abi->emit_fbfetch) + return ctx->abi->emit_fbfetch(ctx->abi); + + for (unsigned chan = comp; chan < ve + comp; chan++) { + if (indir_index) { + unsigned count = glsl_count_attribute_slots(var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->abi->outputs + idx + chan, count, stride, true, true); + + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); + } else { + values[chan] = LLVMBuildLoad(ctx->ac.builder, + ctx->abi->outputs[idx + chan + const_index * stride], ""); + } + } + break; + case nir_var_mem_global: { + LLVMValueRef address = get_src(ctx, instr->src[0]); + LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa); + unsigned explicit_stride = glsl_get_explicit_stride(deref->type); + unsigned natural_stride = type_scalar_size_bytes(deref->type); + unsigned stride = explicit_stride ? explicit_stride : natural_stride; + int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8; + bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; + + if (stride != natural_stride || split_loads) { + if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind) + result_type = LLVMGetElementType(result_type); + + LLVMTypeRef ptr_type = + LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, ""); + + for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) { + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0); + values[i] = + LLVMBuildLoad(ctx->ac.builder, ac_build_gep_ptr(&ctx->ac, address, offset), ""); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic); + } + return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components); + } else { + LLVMTypeRef ptr_type = + LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, ""); + LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic); + return val; + } + } + default: + unreachable("unhandle variable mode"); + } + ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp); + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); +} + +static void visit_store_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + if (ctx->ac.postponed_kill) { + LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, ""); + ac_build_ifcc(&ctx->ac, cond, 7002); + } + + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + + LLVMValueRef temp_ptr, value; + int idx = 0; + unsigned comp = 0; + LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1])); + int writemask = instr->const_index[0]; + LLVMValueRef indir_index; + unsigned const_index; + + if (var) { + get_deref_offset(ctx, deref, false, NULL, NULL, &const_index, &indir_index); + idx = var->data.driver_location; + comp = var->data.location_frac; + + if (var->data.compact) { + const_index += comp; + comp = 0; + } + } + + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 && + (deref->mode == nir_var_shader_out || deref->mode == nir_var_function_temp)) { + + src = LLVMBuildBitCast(ctx->ac.builder, src, + LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), ""); + + writemask = widen_mask(writemask, 2); + } + + writemask = writemask << comp; + + switch (deref->mode) { + case nir_var_shader_out: + + if (ctx->stage == MESA_SHADER_TESS_CTRL) { + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + const bool is_patch = var->data.patch || + var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || + var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER; + + get_deref_offset(ctx, deref, false, NULL, is_patch ? NULL : &vertex_index, &const_index, + &indir_index); + + ctx->abi->store_tcs_outputs(ctx->abi, var, vertex_index, indir_index, const_index, src, + writemask); + break; + } + + for (unsigned chan = 0; chan < 8; chan++) { + int stride = 4; + if (!(writemask & (1 << chan))) + continue; + + value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp); + + if (var->data.compact) + stride = 1; + if (indir_index) { + unsigned count = glsl_count_attribute_slots(var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->abi->outputs + idx + chan, count, stride, true, true); + + tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, ""); + build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan, count, stride, + tmp_vec); + + } else { + temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride]; + + LLVMBuildStore(ctx->ac.builder, value, temp_ptr); + } + } + break; + case nir_var_function_temp: + for (unsigned chan = 0; chan < 8; chan++) { + if (!(writemask & (1 << chan))) + continue; + + value = ac_llvm_extract_elem(&ctx->ac, src, chan); + if (indir_index) { + unsigned count = glsl_count_attribute_slots(var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->locals + idx + chan, count, 4, true, true); + + tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, ""); + build_store_values_extended(&ctx->ac, ctx->locals + idx + chan, count, 4, tmp_vec); + } else { + temp_ptr = ctx->locals[idx + chan + const_index * 4]; + + LLVMBuildStore(ctx->ac.builder, value, temp_ptr); + } + } + break; + + case nir_var_mem_global: { + int writemask = instr->const_index[0]; + LLVMValueRef address = get_src(ctx, instr->src[0]); + LLVMValueRef val = get_src(ctx, instr->src[1]); + + unsigned explicit_stride = glsl_get_explicit_stride(deref->type); + unsigned natural_stride = type_scalar_size_bytes(deref->type); + unsigned stride = explicit_stride ? explicit_stride : natural_stride; + int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8; + bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; + + LLVMTypeRef ptr_type = + LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, ""); + + if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 && stride == natural_stride && + !split_stores) { + LLVMTypeRef ptr_type = + LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, ""); + + val = LLVMBuildBitCast(ctx->ac.builder, val, LLVMGetElementType(LLVMTypeOf(address)), ""); + LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); + } else { + LLVMTypeRef val_type = LLVMTypeOf(val); + if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind) + val_type = LLVMGetElementType(val_type); + + LLVMTypeRef ptr_type = + LLVMPointerType(val_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type, ""); + for (unsigned chan = 0; chan < 4; chan++) { + if (!(writemask & (1 << chan))) + continue; + + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0); + + LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset); + LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val, chan); + src = LLVMBuildBitCast(ctx->ac.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), ""); + LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); + } + } + break; + } + default: + abort(); + break; + } - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7002); + if (ctx->ac.postponed_kill) + ac_build_endif(&ctx->ac, 7002); } static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) { - switch (dim) { - case GLSL_SAMPLER_DIM_BUF: - return 1; - case GLSL_SAMPLER_DIM_1D: - return array ? 2 : 1; - case GLSL_SAMPLER_DIM_2D: - return array ? 3 : 2; - case GLSL_SAMPLER_DIM_MS: - return array ? 4 : 3; - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - return 3; - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_SUBPASS: - return 2; - case GLSL_SAMPLER_DIM_SUBPASS_MS: - return 3; - default: - break; - } - return 0; + switch (dim) { + case GLSL_SAMPLER_DIM_BUF: + return 1; + case GLSL_SAMPLER_DIM_1D: + return array ? 2 : 1; + case GLSL_SAMPLER_DIM_2D: + return array ? 3 : 2; + case GLSL_SAMPLER_DIM_MS: + return array ? 4 : 3; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + return 3; + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_SUBPASS: + return 2; + case GLSL_SAMPLER_DIM_SUBPASS_MS: + return 3; + default: + break; + } + return 0; } static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, - LLVMValueRef coord_x, LLVMValueRef coord_y, - LLVMValueRef coord_z, - LLVMValueRef sample_index, - LLVMValueRef fmask_desc_ptr) -{ - unsigned sample_chan = coord_z ? 3 : 2; - LLVMValueRef addr[4] = {coord_x, coord_y, coord_z}; - addr[sample_chan] = sample_index; + LLVMValueRef coord_x, LLVMValueRef coord_y, + LLVMValueRef coord_z, LLVMValueRef sample_index, + LLVMValueRef fmask_desc_ptr) +{ + unsigned sample_chan = coord_z ? 3 : 2; + LLVMValueRef addr[4] = {coord_x, coord_y, coord_z}; + addr[sample_chan] = sample_index; - ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL); - return addr[sample_chan]; + ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL); + return addr[sample_chan]; } static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr) { - assert(instr->src[0].is_ssa); - return nir_instr_as_deref(instr->src[0].ssa->parent_instr); + assert(instr->src[0].is_ssa); + return nir_instr_as_deref(instr->src[0].ssa->parent_instr); } static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, LLVMValueRef dynamic_index, - enum ac_descriptor_type desc_type, - bool write) + enum ac_descriptor_type desc_type, bool write) { - nir_deref_instr *deref_instr = - instr->src[0].ssa->parent_instr->type == nir_instr_type_deref ? - nir_instr_as_deref(instr->src[0].ssa->parent_instr) : NULL; - - return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write); -} - -static void get_image_coords(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr, - LLVMValueRef dynamic_desc_index, - struct ac_image_args *args, - enum glsl_sampler_dim dim, - bool is_array) -{ - LLVMValueRef src0 = get_src(ctx, instr->src[1]); - LLVMValueRef masks[] = { - LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), - LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), - }; - LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); - - int count; - ASSERTED bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS || - dim == GLSL_SAMPLER_DIM_SUBPASS_MS); - bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || - dim == GLSL_SAMPLER_DIM_SUBPASS_MS); - bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D; - assert(!add_frag_pos && "Input attachments should be lowered by this point."); - count = image_type_to_components_count(dim, is_array); - - if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load || - instr->intrinsic == nir_intrinsic_bindless_image_load)) { - LLVMValueRef fmask_load_address[3]; - - fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); - fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], ""); - if (is_array) - fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], ""); - else - fmask_load_address[2] = NULL; - - sample_index = adjust_sample_index_using_fmask(&ctx->ac, - fmask_load_address[0], - fmask_load_address[1], - fmask_load_address[2], - sample_index, - get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), - AC_DESC_FMASK, &instr->instr, dynamic_desc_index, true, false)); - } - if (count == 1 && !gfx9_1d) { - if (instr->src[1].ssa->num_components) - args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); - else - args->coords[0] = src0; - } else { - int chan; - if (is_ms) - count--; - for (chan = 0; chan < count; ++chan) { - args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan); - } - - if (gfx9_1d) { - if (is_array) { - args->coords[2] = args->coords[1]; - args->coords[1] = ctx->ac.i32_0; - } else - args->coords[1] = ctx->ac.i32_0; - count++; - } - if (ctx->ac.chip_class == GFX9 && - dim == GLSL_SAMPLER_DIM_2D && - !is_array) { - /* The hw can't bind a slice of a 3D image as a 2D - * image, because it ignores BASE_ARRAY if the target - * is 3D. The workaround is to read BASE_ARRAY and set - * it as the 3rd address operand for all 2D images. - */ - LLVMValueRef first_layer, const5, mask; - - const5 = LLVMConstInt(ctx->ac.i32, 5, 0); - mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0); - first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, ""); - first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, ""); - - args->coords[count] = first_layer; - count++; - } - - - if (is_ms) { - args->coords[count] = sample_index; - count++; - } - } + nir_deref_instr *deref_instr = instr->src[0].ssa->parent_instr->type == nir_instr_type_deref + ? nir_instr_as_deref(instr->src[0].ssa->parent_instr) + : NULL; + + return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write); +} + +static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, + LLVMValueRef dynamic_desc_index, struct ac_image_args *args, + enum glsl_sampler_dim dim, bool is_array) +{ + LLVMValueRef src0 = get_src(ctx, instr->src[1]); + LLVMValueRef masks[] = { + LLVMConstInt(ctx->ac.i32, 0, false), + LLVMConstInt(ctx->ac.i32, 1, false), + LLVMConstInt(ctx->ac.i32, 2, false), + LLVMConstInt(ctx->ac.i32, 3, false), + }; + LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); + + int count; + ASSERTED bool add_frag_pos = + (dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS); + bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS); + bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D; + assert(!add_frag_pos && "Input attachments should be lowered by this point."); + count = image_type_to_components_count(dim, is_array); + + if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load || + instr->intrinsic == nir_intrinsic_bindless_image_load)) { + LLVMValueRef fmask_load_address[3]; + + fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); + fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], ""); + if (is_array) + fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], ""); + else + fmask_load_address[2] = NULL; + + sample_index = adjust_sample_index_using_fmask( + &ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2], + sample_index, + get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), AC_DESC_FMASK, + &instr->instr, dynamic_desc_index, true, false)); + } + if (count == 1 && !gfx9_1d) { + if (instr->src[1].ssa->num_components) + args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); + else + args->coords[0] = src0; + } else { + int chan; + if (is_ms) + count--; + for (chan = 0; chan < count; ++chan) { + args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan); + } + + if (gfx9_1d) { + if (is_array) { + args->coords[2] = args->coords[1]; + args->coords[1] = ctx->ac.i32_0; + } else + args->coords[1] = ctx->ac.i32_0; + count++; + } + if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_2D && !is_array) { + /* The hw can't bind a slice of a 3D image as a 2D + * image, because it ignores BASE_ARRAY if the target + * is 3D. The workaround is to read BASE_ARRAY and set + * it as the 3rd address operand for all 2D images. + */ + LLVMValueRef first_layer, const5, mask; + + const5 = LLVMConstInt(ctx->ac.i32, 5, 0); + mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0); + first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, ""); + first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, ""); + + args->coords[count] = first_layer; + count++; + } + + if (is_ms) { + args->coords[count] = sample_index; + count++; + } + } } static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, - LLVMValueRef dynamic_index, - bool write, bool atomic) + LLVMValueRef dynamic_index, bool write, bool atomic) { - LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write); - if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) { - LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), ""); - LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), ""); - stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), ""); - - LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder, - LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""), - elem_count, stride, ""); - - rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count, - LLVMConstInt(ctx->ac.i32, 2, 0), ""); - } - return rsrc; + LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write); + if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) { + LLVMValueRef elem_count = + LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), ""); + LLVMValueRef stride = + LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), ""); + stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), ""); + + LLVMValueRef new_elem_count = LLVMBuildSelect( + ctx->ac.builder, LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""), + elem_count, stride, ""); + + rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count, + LLVMConstInt(ctx->ac.i32, 2, 0), ""); + } + return rsrc; } static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx, - struct waterfall_context *wctx, - const nir_intrinsic_instr *instr) + struct waterfall_context *wctx, + const nir_intrinsic_instr *instr) { - nir_deref_instr *deref_instr = NULL; + nir_deref_instr *deref_instr = NULL; - if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) - deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) + deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + + LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true); + return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); +} - LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true); - return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); -} - -static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr, - bool bindless) -{ - LLVMValueRef res; - - enum glsl_sampler_dim dim; - enum gl_access_qualifier access = nir_intrinsic_access(instr); - bool is_array; - if (bindless) { - dim = nir_intrinsic_image_dim(instr); - is_array = nir_intrinsic_image_array(instr); - } else { - const nir_deref_instr *image_deref = get_image_deref(instr); - const struct glsl_type *type = image_deref->type; - const nir_variable *var = nir_deref_instr_get_variable(image_deref); - dim = glsl_get_sampler_dim(type); - access |= var->data.access; - is_array = glsl_sampler_type_is_array(type); - } - - struct waterfall_context wctx; - LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); - - struct ac_image_args args = {}; - - args.cache_policy = get_cache_policy(ctx, access, false, false); - - if (dim == GLSL_SAMPLER_DIM_BUF) { - unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); - unsigned num_channels = util_last_bit(mask); - LLVMValueRef rsrc, vindex; - - rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false); - vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), - ctx->ac.i32_0, ""); - - assert(instr->dest.is_ssa); - bool can_speculate = access & ACCESS_CAN_REORDER; - res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, - ctx->ac.i32_0, num_channels, - args.cache_policy, - can_speculate, - instr->dest.ssa.bit_size == 16); - res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); - - res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); - res = ac_to_integer(&ctx->ac, res); - } else { - bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0; - - args.opcode = level_zero ? ac_image_load : ac_image_load_mip; - args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false); - get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array); - args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array); - if (!level_zero) - args.lod = get_src(ctx, instr->src[3]); - args.dmask = 15; - args.attributes = AC_FUNC_ATTR_READONLY; - - assert(instr->dest.is_ssa); - args.d16 = instr->dest.ssa.bit_size == 16; - - res = ac_build_image_opcode(&ctx->ac, &args); - } - return exit_waterfall(ctx, &wctx, res); -} - -static void visit_image_store(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr, - bool bindless) -{ - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, - ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7003); - } - - enum glsl_sampler_dim dim; - enum gl_access_qualifier access = nir_intrinsic_access(instr); - bool is_array; - - if (bindless) { - dim = nir_intrinsic_image_dim(instr); - is_array = nir_intrinsic_image_array(instr); - } else { - const nir_deref_instr *image_deref = get_image_deref(instr); - const struct glsl_type *type = image_deref->type; - const nir_variable *var = nir_deref_instr_get_variable(image_deref); - dim = glsl_get_sampler_dim(type); - access |= var->data.access; - is_array = glsl_sampler_type_is_array(type); - } - - struct waterfall_context wctx; - LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); - - bool writeonly_memory = access & ACCESS_NON_READABLE; - struct ac_image_args args = {}; - - args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory); - - if (dim == GLSL_SAMPLER_DIM_BUF) { - LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false); - LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); - unsigned src_channels = ac_get_llvm_num_components(src); - LLVMValueRef vindex; - - if (src_channels == 3) - src = ac_build_expand_to_vec4(&ctx->ac, src, 3); - - vindex = LLVMBuildExtractElement(ctx->ac.builder, - get_src(ctx, instr->src[1]), - ctx->ac.i32_0, ""); - - ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, - ctx->ac.i32_0, args.cache_policy); - } else { - bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0; - - args.opcode = level_zero ? ac_image_store : ac_image_store_mip; - args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); - args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true); - get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array); - args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array); - if (!level_zero) - args.lod = get_src(ctx, instr->src[4]); - args.dmask = 15; - args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16; - - ac_build_image_opcode(&ctx->ac, &args); - } - - exit_waterfall(ctx, &wctx, NULL); - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7003); -} - -static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr, - bool bindless) -{ - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, - ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7004); - } - - LLVMValueRef params[7]; - int param_count = 0; - - bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap || - instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap; - const char *atomic_name; - char intrinsic_name[64]; - enum ac_atomic_op atomic_subop; - ASSERTED int length; - - enum glsl_sampler_dim dim; - bool is_array; - if (bindless) { - if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_imin || - instr->intrinsic == nir_intrinsic_bindless_image_atomic_umin || - instr->intrinsic == nir_intrinsic_bindless_image_atomic_imax || - instr->intrinsic == nir_intrinsic_bindless_image_atomic_umax) { - ASSERTED const GLenum format = nir_intrinsic_format(instr); - assert(format == GL_R32UI || format == GL_R32I); - } - dim = nir_intrinsic_image_dim(instr); - is_array = nir_intrinsic_image_array(instr); - } else { - const struct glsl_type *type = get_image_deref(instr)->type; - dim = glsl_get_sampler_dim(type); - is_array = glsl_sampler_type_is_array(type); - } - - struct waterfall_context wctx; - LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); - - switch (instr->intrinsic) { - case nir_intrinsic_bindless_image_atomic_add: - case nir_intrinsic_image_deref_atomic_add: - atomic_name = "add"; - atomic_subop = ac_atomic_add; - break; - case nir_intrinsic_bindless_image_atomic_imin: - case nir_intrinsic_image_deref_atomic_imin: - atomic_name = "smin"; - atomic_subop = ac_atomic_smin; - break; - case nir_intrinsic_bindless_image_atomic_umin: - case nir_intrinsic_image_deref_atomic_umin: - atomic_name = "umin"; - atomic_subop = ac_atomic_umin; - break; - case nir_intrinsic_bindless_image_atomic_imax: - case nir_intrinsic_image_deref_atomic_imax: - atomic_name = "smax"; - atomic_subop = ac_atomic_smax; - break; - case nir_intrinsic_bindless_image_atomic_umax: - case nir_intrinsic_image_deref_atomic_umax: - atomic_name = "umax"; - atomic_subop = ac_atomic_umax; - break; - case nir_intrinsic_bindless_image_atomic_and: - case nir_intrinsic_image_deref_atomic_and: - atomic_name = "and"; - atomic_subop = ac_atomic_and; - break; - case nir_intrinsic_bindless_image_atomic_or: - case nir_intrinsic_image_deref_atomic_or: - atomic_name = "or"; - atomic_subop = ac_atomic_or; - break; - case nir_intrinsic_bindless_image_atomic_xor: - case nir_intrinsic_image_deref_atomic_xor: - atomic_name = "xor"; - atomic_subop = ac_atomic_xor; - break; - case nir_intrinsic_bindless_image_atomic_exchange: - case nir_intrinsic_image_deref_atomic_exchange: - atomic_name = "swap"; - atomic_subop = ac_atomic_swap; - break; - case nir_intrinsic_bindless_image_atomic_comp_swap: - case nir_intrinsic_image_deref_atomic_comp_swap: - atomic_name = "cmpswap"; - atomic_subop = 0; /* not used */ - break; - case nir_intrinsic_bindless_image_atomic_inc_wrap: - case nir_intrinsic_image_deref_atomic_inc_wrap: { - atomic_name = "inc"; - atomic_subop = ac_atomic_inc_wrap; - break; - } - case nir_intrinsic_bindless_image_atomic_dec_wrap: - case nir_intrinsic_image_deref_atomic_dec_wrap: - atomic_name = "dec"; - atomic_subop = ac_atomic_dec_wrap; - break; - default: - abort(); - } - - if (cmpswap) - params[param_count++] = get_src(ctx, instr->src[4]); - params[param_count++] = get_src(ctx, instr->src[3]); - - LLVMValueRef result; - if (dim == GLSL_SAMPLER_DIM_BUF) { - params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true); - params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), - ctx->ac.i32_0, ""); /* vindex */ - params[param_count++] = ctx->ac.i32_0; /* voffset */ - if (LLVM_VERSION_MAJOR >= 9) { - /* XXX: The new raw/struct atomic intrinsics are buggy - * with LLVM 8, see r358579. - */ - params[param_count++] = ctx->ac.i32_0; /* soffset */ - params[param_count++] = ctx->ac.i32_0; /* slc */ - - length = snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name); - } else { - params[param_count++] = ctx->ac.i1false; /* slc */ - - length = snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.buffer.atomic.%s", atomic_name); - } - - assert(length < sizeof(intrinsic_name)); - result = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, - params, param_count, 0); - } else { - struct ac_image_args args = {}; - args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic; - args.atomic = atomic_subop; - args.data[0] = params[0]; - if (cmpswap) - args.data[1] = params[1]; - args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true); - get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array); - args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array); - - result = ac_build_image_opcode(&ctx->ac, &args); - } - - result = exit_waterfall(ctx, &wctx, result); - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7004); - return result; -} - -static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - struct waterfall_context wctx; - LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); - LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false); - - LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc); - - return exit_waterfall(ctx, &wctx, ret); -} - -static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr, - bool bindless) -{ - LLVMValueRef res; - - enum glsl_sampler_dim dim; - bool is_array; - if (bindless) { - dim = nir_intrinsic_image_dim(instr); - is_array = nir_intrinsic_image_array(instr); - } else { - const struct glsl_type *type = get_image_deref(instr)->type; - dim = glsl_get_sampler_dim(type); - is_array = glsl_sampler_type_is_array(type); - } - - struct waterfall_context wctx; - LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); - - if (dim == GLSL_SAMPLER_DIM_BUF) { - res = get_buffer_size(ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true); - } else { - - struct ac_image_args args = { 0 }; - - args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array); - args.dmask = 0xf; - args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false); - args.opcode = ac_image_get_resinfo; - args.lod = ctx->ac.i32_0; - args.attributes = AC_FUNC_ATTR_READNONE; - - res = ac_build_image_opcode(&ctx->ac, &args); - - LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); - - if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) { - LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); - LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); - z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); - res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, ""); - } - - if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) { - LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); - res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, - ctx->ac.i32_1, ""); - } - } - return exit_waterfall(ctx, &wctx, res); -} - -static void emit_membar(struct ac_llvm_context *ac, - const nir_intrinsic_instr *instr) -{ - unsigned wait_flags = 0; - - switch (instr->intrinsic) { - case nir_intrinsic_memory_barrier: - case nir_intrinsic_group_memory_barrier: - wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE; - break; - case nir_intrinsic_memory_barrier_buffer: - case nir_intrinsic_memory_barrier_image: - wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE; - break; - case nir_intrinsic_memory_barrier_shared: - wait_flags = AC_WAIT_LGKM; - break; - default: - break; - } +static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, + bool bindless) +{ + LLVMValueRef res; + + enum glsl_sampler_dim dim; + enum gl_access_qualifier access = nir_intrinsic_access(instr); + bool is_array; + if (bindless) { + dim = nir_intrinsic_image_dim(instr); + is_array = nir_intrinsic_image_array(instr); + } else { + const nir_deref_instr *image_deref = get_image_deref(instr); + const struct glsl_type *type = image_deref->type; + const nir_variable *var = nir_deref_instr_get_variable(image_deref); + dim = glsl_get_sampler_dim(type); + access |= var->data.access; + is_array = glsl_sampler_type_is_array(type); + } + + struct waterfall_context wctx; + LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); + + struct ac_image_args args = {}; + + args.cache_policy = get_cache_policy(ctx, access, false, false); + + if (dim == GLSL_SAMPLER_DIM_BUF) { + unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); + unsigned num_channels = util_last_bit(mask); + LLVMValueRef rsrc, vindex; + + rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false); + vindex = + LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); + + assert(instr->dest.is_ssa); + bool can_speculate = access & ACCESS_CAN_REORDER; + res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels, + args.cache_policy, can_speculate, + instr->dest.ssa.bit_size == 16); + res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); + + res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); + res = ac_to_integer(&ctx->ac, res); + } else { + bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0; + + args.opcode = level_zero ? ac_image_load : ac_image_load_mip; + args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false); + get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array); + args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array); + if (!level_zero) + args.lod = get_src(ctx, instr->src[3]); + args.dmask = 15; + args.attributes = AC_FUNC_ATTR_READONLY; + + assert(instr->dest.is_ssa); + args.d16 = instr->dest.ssa.bit_size == 16; + + res = ac_build_image_opcode(&ctx->ac, &args); + } + return exit_waterfall(ctx, &wctx, res); +} + +static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, + bool bindless) +{ + if (ctx->ac.postponed_kill) { + LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, ""); + ac_build_ifcc(&ctx->ac, cond, 7003); + } + + enum glsl_sampler_dim dim; + enum gl_access_qualifier access = nir_intrinsic_access(instr); + bool is_array; + + if (bindless) { + dim = nir_intrinsic_image_dim(instr); + is_array = nir_intrinsic_image_array(instr); + } else { + const nir_deref_instr *image_deref = get_image_deref(instr); + const struct glsl_type *type = image_deref->type; + const nir_variable *var = nir_deref_instr_get_variable(image_deref); + dim = glsl_get_sampler_dim(type); + access |= var->data.access; + is_array = glsl_sampler_type_is_array(type); + } + + struct waterfall_context wctx; + LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); + + bool writeonly_memory = access & ACCESS_NON_READABLE; + struct ac_image_args args = {}; + + args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory); + + if (dim == GLSL_SAMPLER_DIM_BUF) { + LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false); + LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); + unsigned src_channels = ac_get_llvm_num_components(src); + LLVMValueRef vindex; + + if (src_channels == 3) + src = ac_build_expand_to_vec4(&ctx->ac, src, 3); + + vindex = + LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); + + ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.cache_policy); + } else { + bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0; + + args.opcode = level_zero ? ac_image_store : ac_image_store_mip; + args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); + args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true); + get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array); + args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array); + if (!level_zero) + args.lod = get_src(ctx, instr->src[4]); + args.dmask = 15; + args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16; + + ac_build_image_opcode(&ctx->ac, &args); + } + + exit_waterfall(ctx, &wctx, NULL); + if (ctx->ac.postponed_kill) + ac_build_endif(&ctx->ac, 7003); +} + +static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, + bool bindless) +{ + if (ctx->ac.postponed_kill) { + LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, ""); + ac_build_ifcc(&ctx->ac, cond, 7004); + } + + LLVMValueRef params[7]; + int param_count = 0; + + bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap || + instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap; + const char *atomic_name; + char intrinsic_name[64]; + enum ac_atomic_op atomic_subop; + ASSERTED int length; + + enum glsl_sampler_dim dim; + bool is_array; + if (bindless) { + if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_imin || + instr->intrinsic == nir_intrinsic_bindless_image_atomic_umin || + instr->intrinsic == nir_intrinsic_bindless_image_atomic_imax || + instr->intrinsic == nir_intrinsic_bindless_image_atomic_umax) { + ASSERTED const GLenum format = nir_intrinsic_format(instr); + assert(format == GL_R32UI || format == GL_R32I); + } + dim = nir_intrinsic_image_dim(instr); + is_array = nir_intrinsic_image_array(instr); + } else { + const struct glsl_type *type = get_image_deref(instr)->type; + dim = glsl_get_sampler_dim(type); + is_array = glsl_sampler_type_is_array(type); + } + + struct waterfall_context wctx; + LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); + + switch (instr->intrinsic) { + case nir_intrinsic_bindless_image_atomic_add: + case nir_intrinsic_image_deref_atomic_add: + atomic_name = "add"; + atomic_subop = ac_atomic_add; + break; + case nir_intrinsic_bindless_image_atomic_imin: + case nir_intrinsic_image_deref_atomic_imin: + atomic_name = "smin"; + atomic_subop = ac_atomic_smin; + break; + case nir_intrinsic_bindless_image_atomic_umin: + case nir_intrinsic_image_deref_atomic_umin: + atomic_name = "umin"; + atomic_subop = ac_atomic_umin; + break; + case nir_intrinsic_bindless_image_atomic_imax: + case nir_intrinsic_image_deref_atomic_imax: + atomic_name = "smax"; + atomic_subop = ac_atomic_smax; + break; + case nir_intrinsic_bindless_image_atomic_umax: + case nir_intrinsic_image_deref_atomic_umax: + atomic_name = "umax"; + atomic_subop = ac_atomic_umax; + break; + case nir_intrinsic_bindless_image_atomic_and: + case nir_intrinsic_image_deref_atomic_and: + atomic_name = "and"; + atomic_subop = ac_atomic_and; + break; + case nir_intrinsic_bindless_image_atomic_or: + case nir_intrinsic_image_deref_atomic_or: + atomic_name = "or"; + atomic_subop = ac_atomic_or; + break; + case nir_intrinsic_bindless_image_atomic_xor: + case nir_intrinsic_image_deref_atomic_xor: + atomic_name = "xor"; + atomic_subop = ac_atomic_xor; + break; + case nir_intrinsic_bindless_image_atomic_exchange: + case nir_intrinsic_image_deref_atomic_exchange: + atomic_name = "swap"; + atomic_subop = ac_atomic_swap; + break; + case nir_intrinsic_bindless_image_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic_comp_swap: + atomic_name = "cmpswap"; + atomic_subop = 0; /* not used */ + break; + case nir_intrinsic_bindless_image_atomic_inc_wrap: + case nir_intrinsic_image_deref_atomic_inc_wrap: { + atomic_name = "inc"; + atomic_subop = ac_atomic_inc_wrap; + break; + } + case nir_intrinsic_bindless_image_atomic_dec_wrap: + case nir_intrinsic_image_deref_atomic_dec_wrap: + atomic_name = "dec"; + atomic_subop = ac_atomic_dec_wrap; + break; + default: + abort(); + } + + if (cmpswap) + params[param_count++] = get_src(ctx, instr->src[4]); + params[param_count++] = get_src(ctx, instr->src[3]); + + LLVMValueRef result; + if (dim == GLSL_SAMPLER_DIM_BUF) { + params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true); + params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), + ctx->ac.i32_0, ""); /* vindex */ + params[param_count++] = ctx->ac.i32_0; /* voffset */ + if (LLVM_VERSION_MAJOR >= 9) { + /* XXX: The new raw/struct atomic intrinsics are buggy + * with LLVM 8, see r358579. + */ + params[param_count++] = ctx->ac.i32_0; /* soffset */ + params[param_count++] = ctx->ac.i32_0; /* slc */ + + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name); + } else { + params[param_count++] = ctx->ac.i1false; /* slc */ + + length = snprintf(intrinsic_name, sizeof(intrinsic_name), "llvm.amdgcn.buffer.atomic.%s", + atomic_name); + } + + assert(length < sizeof(intrinsic_name)); + result = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0); + } else { + struct ac_image_args args = {}; + args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic; + args.atomic = atomic_subop; + args.data[0] = params[0]; + if (cmpswap) + args.data[1] = params[1]; + args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true); + get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array); + args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array); + + result = ac_build_image_opcode(&ctx->ac, &args); + } + + result = exit_waterfall(ctx, &wctx, result); + if (ctx->ac.postponed_kill) + ac_build_endif(&ctx->ac, 7004); + return result; +} + +static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + struct waterfall_context wctx; + LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); + LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false); + + LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc); + + return exit_waterfall(ctx, &wctx, ret); +} + +static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, + bool bindless) +{ + LLVMValueRef res; + + enum glsl_sampler_dim dim; + bool is_array; + if (bindless) { + dim = nir_intrinsic_image_dim(instr); + is_array = nir_intrinsic_image_array(instr); + } else { + const struct glsl_type *type = get_image_deref(instr)->type; + dim = glsl_get_sampler_dim(type); + is_array = glsl_sampler_type_is_array(type); + } + + struct waterfall_context wctx; + LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); + + if (dim == GLSL_SAMPLER_DIM_BUF) { + res = get_buffer_size( + ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true); + } else { + + struct ac_image_args args = {0}; + + args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array); + args.dmask = 0xf; + args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false); + args.opcode = ac_image_get_resinfo; + args.lod = ctx->ac.i32_0; + args.attributes = AC_FUNC_ATTR_READNONE; + + res = ac_build_image_opcode(&ctx->ac, &args); + + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + + if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) { + LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); + LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); + z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); + res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, ""); + } + + if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) { + LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); + res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, ctx->ac.i32_1, ""); + } + } + return exit_waterfall(ctx, &wctx, res); +} + +static void emit_membar(struct ac_llvm_context *ac, const nir_intrinsic_instr *instr) +{ + unsigned wait_flags = 0; + + switch (instr->intrinsic) { + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE; + break; + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE; + break; + case nir_intrinsic_memory_barrier_shared: + wait_flags = AC_WAIT_LGKM; + break; + default: + break; + } - ac_build_waitcnt(ac, wait_flags); + ac_build_waitcnt(ac, wait_flags); } void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) { - /* GFX6 only (thanks to a hw bug workaround): - * The real barrier instruction isn’t needed, because an entire patch - * always fits into a single wave. - */ - if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) { - ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE); - return; - } - ac_build_s_barrier(ac); -} - -static void emit_discard(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef cond; - - if (instr->intrinsic == nir_intrinsic_discard_if) { - cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, - get_src(ctx, instr->src[0]), - ctx->ac.i32_0, ""); - } else { - assert(instr->intrinsic == nir_intrinsic_discard); - cond = ctx->ac.i1false; - } - - ac_build_kill_if_false(&ctx->ac, cond); -} - -static void emit_demote(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef cond; - - if (instr->intrinsic == nir_intrinsic_demote_if) { - cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, - get_src(ctx, instr->src[0]), - ctx->ac.i32_0, ""); - } else { - assert(instr->intrinsic == nir_intrinsic_demote); - cond = ctx->ac.i1false; - } - - /* Kill immediately while maintaining WQM. */ - ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond)); - - LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, ""); - mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, ""); - LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill); - return; -} - -static LLVMValueRef -visit_load_local_invocation_index(struct ac_nir_context *ctx) -{ - LLVMValueRef result; - LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac); - result = LLVMBuildAnd(ctx->ac.builder, - ac_get_arg(&ctx->ac, ctx->args->tg_size), - LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); - - if (ctx->ac.wave_size == 32) - result = LLVMBuildLShr(ctx->ac.builder, result, - LLVMConstInt(ctx->ac.i32, 1, false), ""); - - return LLVMBuildAdd(ctx->ac.builder, result, thread_id, ""); -} - -static LLVMValueRef -visit_load_subgroup_id(struct ac_nir_context *ctx) -{ - if (ctx->stage == MESA_SHADER_COMPUTE) { - LLVMValueRef result; - result = LLVMBuildAnd(ctx->ac.builder, - ac_get_arg(&ctx->ac, ctx->args->tg_size), - LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); - return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), ""); - } else { - return LLVMConstInt(ctx->ac.i32, 0, false); - } -} - -static LLVMValueRef -visit_load_num_subgroups(struct ac_nir_context *ctx) -{ - if (ctx->stage == MESA_SHADER_COMPUTE) { - return LLVMBuildAnd(ctx->ac.builder, - ac_get_arg(&ctx->ac, ctx->args->tg_size), - LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); - } else { - return LLVMConstInt(ctx->ac.i32, 1, false); - } -} - -static LLVMValueRef -visit_first_invocation(struct ac_nir_context *ctx) -{ - LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1); - const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64"; - - /* The second argument is whether cttz(0) should be defined, but we do not care. */ - LLVMValueRef args[] = {active_set, ctx->ac.i1false}; - LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, - ctx->ac.iN_wavemask, args, 2, - AC_FUNC_ATTR_NOUNWIND | - AC_FUNC_ATTR_READNONE); - - return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, ""); -} - -static LLVMValueRef -visit_load_shared(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef values[4], derived_ptr, index, ret; - - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], - instr->dest.ssa.bit_size); - - for (int chan = 0; chan < instr->num_components; chan++) { - index = LLVMConstInt(ctx->ac.i32, chan, 0); - derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); - values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, ""); - } - - ret = ac_build_gather_values(&ctx->ac, values, instr->num_components); - return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); -} - -static void -visit_store_shared(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef derived_ptr, data,index; - LLVMBuilderRef builder = ctx->ac.builder; - - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], - instr->src[0].ssa->bit_size); - LLVMValueRef src = get_src(ctx, instr->src[0]); - - int writemask = nir_intrinsic_write_mask(instr); - for (int chan = 0; chan < 4; chan++) { - if (!(writemask & (1 << chan))) { - continue; - } - data = ac_llvm_extract_elem(&ctx->ac, src, chan); - index = LLVMConstInt(ctx->ac.i32, chan, 0); - derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); - LLVMBuildStore(builder, data, derived_ptr); - } -} - -static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr, - LLVMValueRef ptr, int src_idx) -{ - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, - ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7005); - } - - LLVMValueRef result; - LLVMValueRef src = get_src(ctx, instr->src[src_idx]); - - const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup"; - - if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) { - nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); - if (deref->mode == nir_var_mem_global) { - /* use "singlethread" sync scope to implement relaxed ordering */ - sync_scope = LLVM_VERSION_MAJOR >= 9 ? "singlethread-one-as" : "singlethread"; - - LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(src), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr))); - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type , ""); - } - } - - if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap || - instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) { - LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]); - result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope); - result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, ""); - } else { - LLVMAtomicRMWBinOp op; - switch (instr->intrinsic) { - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_deref_atomic_add: - op = LLVMAtomicRMWBinOpAdd; - break; - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_deref_atomic_umin: - op = LLVMAtomicRMWBinOpUMin; - break; - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_deref_atomic_umax: - op = LLVMAtomicRMWBinOpUMax; - break; - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_deref_atomic_imin: - op = LLVMAtomicRMWBinOpMin; - break; - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_deref_atomic_imax: - op = LLVMAtomicRMWBinOpMax; - break; - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_deref_atomic_and: - op = LLVMAtomicRMWBinOpAnd; - break; - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_deref_atomic_or: - op = LLVMAtomicRMWBinOpOr; - break; - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_deref_atomic_xor: - op = LLVMAtomicRMWBinOpXor; - break; - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_deref_atomic_exchange: - op = LLVMAtomicRMWBinOpXchg; - break; + /* GFX6 only (thanks to a hw bug workaround): + * The real barrier instruction isn’t needed, because an entire patch + * always fits into a single wave. + */ + if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) { + ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE); + return; + } + ac_build_s_barrier(ac); +} + +static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) +{ + LLVMValueRef cond; + + if (instr->intrinsic == nir_intrinsic_discard_if) { + cond = + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, get_src(ctx, instr->src[0]), ctx->ac.i32_0, ""); + } else { + assert(instr->intrinsic == nir_intrinsic_discard); + cond = ctx->ac.i1false; + } + + ac_build_kill_if_false(&ctx->ac, cond); +} + +static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) +{ + LLVMValueRef cond; + + if (instr->intrinsic == nir_intrinsic_demote_if) { + cond = + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, get_src(ctx, instr->src[0]), ctx->ac.i32_0, ""); + } else { + assert(instr->intrinsic == nir_intrinsic_demote); + cond = ctx->ac.i1false; + } + + /* Kill immediately while maintaining WQM. */ + ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond)); + + LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, ""); + mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, ""); + LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill); + return; +} + +static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx) +{ + LLVMValueRef result; + LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac); + result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size), + LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); + + if (ctx->ac.wave_size == 32) + result = LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 1, false), ""); + + return LLVMBuildAdd(ctx->ac.builder, result, thread_id, ""); +} + +static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx) +{ + if (ctx->stage == MESA_SHADER_COMPUTE) { + LLVMValueRef result; + result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size), + LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); + return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), ""); + } else { + return LLVMConstInt(ctx->ac.i32, 0, false); + } +} + +static LLVMValueRef visit_load_num_subgroups(struct ac_nir_context *ctx) +{ + if (ctx->stage == MESA_SHADER_COMPUTE) { + return LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size), + LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); + } else { + return LLVMConstInt(ctx->ac.i32, 1, false); + } +} + +static LLVMValueRef visit_first_invocation(struct ac_nir_context *ctx) +{ + LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1); + const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64"; + + /* The second argument is whether cttz(0) should be defined, but we do not care. */ + LLVMValueRef args[] = {active_set, ctx->ac.i1false}; + LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, ctx->ac.iN_wavemask, args, 2, + AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE); + + return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, ""); +} + +static LLVMValueRef visit_load_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) +{ + LLVMValueRef values[4], derived_ptr, index, ret; + + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->dest.ssa.bit_size); + + for (int chan = 0; chan < instr->num_components; chan++) { + index = LLVMConstInt(ctx->ac.i32, chan, 0); + derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); + values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, ""); + } + + ret = ac_build_gather_values(&ctx->ac, values, instr->num_components); + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); +} + +static void visit_store_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) +{ + LLVMValueRef derived_ptr, data, index; + LLVMBuilderRef builder = ctx->ac.builder; + + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], instr->src[0].ssa->bit_size); + LLVMValueRef src = get_src(ctx, instr->src[0]); + + int writemask = nir_intrinsic_write_mask(instr); + for (int chan = 0; chan < 4; chan++) { + if (!(writemask & (1 << chan))) { + continue; + } + data = ac_llvm_extract_elem(&ctx->ac, src, chan); + index = LLVMConstInt(ctx->ac.i32, chan, 0); + derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); + LLVMBuildStore(builder, data, derived_ptr); + } +} + +static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, + LLVMValueRef ptr, int src_idx) +{ + if (ctx->ac.postponed_kill) { + LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, ""); + ac_build_ifcc(&ctx->ac, cond, 7005); + } + + LLVMValueRef result; + LLVMValueRef src = get_src(ctx, instr->src[src_idx]); + + const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup"; + + if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) { + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + if (deref->mode == nir_var_mem_global) { + /* use "singlethread" sync scope to implement relaxed ordering */ + sync_scope = LLVM_VERSION_MAJOR >= 9 ? "singlethread-one-as" : "singlethread"; + + LLVMTypeRef ptr_type = + LLVMPointerType(LLVMTypeOf(src), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr))); + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, ""); + } + } + + if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap || + instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) { + LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]); + result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope); + result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, ""); + } else { + LLVMAtomicRMWBinOp op; + switch (instr->intrinsic) { + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_deref_atomic_add: + op = LLVMAtomicRMWBinOpAdd; + break; + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_deref_atomic_umin: + op = LLVMAtomicRMWBinOpUMin; + break; + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_deref_atomic_umax: + op = LLVMAtomicRMWBinOpUMax; + break; + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_deref_atomic_imin: + op = LLVMAtomicRMWBinOpMin; + break; + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_deref_atomic_imax: + op = LLVMAtomicRMWBinOpMax; + break; + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_deref_atomic_and: + op = LLVMAtomicRMWBinOpAnd; + break; + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_deref_atomic_or: + op = LLVMAtomicRMWBinOpOr; + break; + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_deref_atomic_xor: + op = LLVMAtomicRMWBinOpXor; + break; + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_deref_atomic_exchange: + op = LLVMAtomicRMWBinOpXchg; + break; #if LLVM_VERSION_MAJOR >= 10 - case nir_intrinsic_shared_atomic_fadd: - case nir_intrinsic_deref_atomic_fadd: - op = LLVMAtomicRMWBinOpFAdd; - break; + case nir_intrinsic_shared_atomic_fadd: + case nir_intrinsic_deref_atomic_fadd: + op = LLVMAtomicRMWBinOpFAdd; + break; #endif - default: - return NULL; - } - - LLVMValueRef val; - - if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd || - instr->intrinsic == nir_intrinsic_deref_atomic_fadd) { - val = ac_to_float(&ctx->ac, src); - } else { - val = ac_to_integer(&ctx->ac, src); - } - - result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope); - } - - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7005); - return result; + default: + return NULL; + } + + LLVMValueRef val; + + if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd || + instr->intrinsic == nir_intrinsic_deref_atomic_fadd) { + val = ac_to_float(&ctx->ac, src); + } else { + val = ac_to_integer(&ctx->ac, src); + } + + result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope); + } + + if (ctx->ac.postponed_kill) + ac_build_endif(&ctx->ac, 7005); + return result; } static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) { - LLVMValueRef values[2]; - LLVMValueRef pos[2]; + LLVMValueRef values[2]; + LLVMValueRef pos[2]; + + pos[0] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[0])); + pos[1] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[1])); + + values[0] = ac_build_fract(&ctx->ac, pos[0], 32); + values[1] = ac_build_fract(&ctx->ac, pos[1], 32); + return ac_build_gather_values(&ctx->ac, values, 2); +} + +static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx, enum glsl_interp_mode interp, + unsigned location) +{ + switch (interp) { + case INTERP_MODE_FLAT: + default: + return NULL; + case INTERP_MODE_SMOOTH: + case INTERP_MODE_NONE: + if (location == INTERP_CENTER) + return ac_get_arg(&ctx->ac, ctx->args->persp_center); + else if (location == INTERP_CENTROID) + return ctx->abi->persp_centroid; + else if (location == INTERP_SAMPLE) + return ac_get_arg(&ctx->ac, ctx->args->persp_sample); + break; + case INTERP_MODE_NOPERSPECTIVE: + if (location == INTERP_CENTER) + return ac_get_arg(&ctx->ac, ctx->args->linear_center); + else if (location == INTERP_CENTROID) + return ctx->abi->linear_centroid; + else if (location == INTERP_SAMPLE) + return ac_get_arg(&ctx->ac, ctx->args->linear_sample); + break; + } + return NULL; +} + +static LLVMValueRef barycentric_center(struct ac_nir_context *ctx, unsigned mode) +{ + LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER); + return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, ""); +} + +static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx, unsigned mode, + LLVMValueRef offset) +{ + LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER); + LLVMValueRef src_c0 = + ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, "")); + LLVMValueRef src_c1 = + ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, "")); - pos[0] = ac_to_float(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->frag_pos[0])); - pos[1] = ac_to_float(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->frag_pos[1])); - - values[0] = ac_build_fract(&ctx->ac, pos[0], 32); - values[1] = ac_build_fract(&ctx->ac, pos[1], 32); - return ac_build_gather_values(&ctx->ac, values, 2); -} - -static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx, - enum glsl_interp_mode interp, unsigned location) -{ - switch (interp) { - case INTERP_MODE_FLAT: - default: - return NULL; - case INTERP_MODE_SMOOTH: - case INTERP_MODE_NONE: - if (location == INTERP_CENTER) - return ac_get_arg(&ctx->ac, ctx->args->persp_center); - else if (location == INTERP_CENTROID) - return ctx->abi->persp_centroid; - else if (location == INTERP_SAMPLE) - return ac_get_arg(&ctx->ac, ctx->args->persp_sample); - break; - case INTERP_MODE_NOPERSPECTIVE: - if (location == INTERP_CENTER) - return ac_get_arg(&ctx->ac, ctx->args->linear_center); - else if (location == INTERP_CENTROID) - return ctx->abi->linear_centroid; - else if (location == INTERP_SAMPLE) - return ac_get_arg(&ctx->ac, ctx->args->linear_sample); - break; - } - return NULL; -} - -static LLVMValueRef barycentric_center(struct ac_nir_context *ctx, - unsigned mode) -{ - LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER); - return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, ""); -} - -static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx, - unsigned mode, - LLVMValueRef offset) -{ - LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER); - LLVMValueRef src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, "")); - LLVMValueRef src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, "")); - - LLVMValueRef ij_out[2]; - LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param); - - /* - * take the I then J parameters, and the DDX/Y for it, and - * calculate the IJ inputs for the interpolator. - * temp1 = ddx * offset/sample.x + I; - * interp_param.I = ddy * offset/sample.y + temp1; - * temp1 = ddx * offset/sample.x + J; - * interp_param.J = ddy * offset/sample.y + temp1; - */ - for (unsigned i = 0; i < 2; i++) { - LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false); - LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false); - LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, - ddxy_out, ix_ll, ""); - LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, - ddxy_out, iy_ll, ""); - LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, - interp_param, ix_ll, ""); - LLVMValueRef temp1, temp2; - - interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, - ctx->ac.f32, ""); - - temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el); - temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1); - - ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, - temp2, ctx->ac.i32, ""); - } - interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); - return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, ""); -} - -static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx, - unsigned mode) -{ - LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID); - return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, ""); -} - -static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx, - unsigned mode, - LLVMValueRef sample_id) -{ - if (ctx->abi->interp_at_sample_force_center) - return barycentric_center(ctx, mode); - - LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f); - - /* fetch sample ID */ - LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id); - - LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, ""); - src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, ""); - LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, ""); - src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, ""); - LLVMValueRef coords[] = { src_c0, src_c1 }; - LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2); - - return barycentric_offset(ctx, mode, offset); + LLVMValueRef ij_out[2]; + LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param); + + /* + * take the I then J parameters, and the DDX/Y for it, and + * calculate the IJ inputs for the interpolator. + * temp1 = ddx * offset/sample.x + I; + * interp_param.I = ddy * offset/sample.y + temp1; + * temp1 = ddx * offset/sample.x + J; + * interp_param.J = ddy * offset/sample.y + temp1; + */ + for (unsigned i = 0; i < 2; i++) { + LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false); + LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false); + LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, ix_ll, ""); + LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, iy_ll, ""); + LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ix_ll, ""); + LLVMValueRef temp1, temp2; + + interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, ctx->ac.f32, ""); + + temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el); + temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1); + + ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, temp2, ctx->ac.i32, ""); + } + interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); + return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, ""); +} + +static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx, unsigned mode) +{ + LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID); + return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, ""); } +static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx, unsigned mode, + LLVMValueRef sample_id) +{ + if (ctx->abi->interp_at_sample_force_center) + return barycentric_center(ctx, mode); + + LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f); + + /* fetch sample ID */ + LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id); + + LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, ""); + src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, ""); + LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, ""); + src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, ""); + LLVMValueRef coords[] = {src_c0, src_c1}; + LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2); -static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx, - unsigned mode) + return barycentric_offset(ctx, mode, offset); +} + +static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx, unsigned mode) { - LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE); - return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, ""); + LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE); + return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, ""); } static LLVMValueRef barycentric_model(struct ac_nir_context *ctx) { - return LLVMBuildBitCast(ctx->ac.builder, - ac_get_arg(&ctx->ac, ctx->args->pull_model), - ctx->ac.v3i32, ""); -} - -static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, - LLVMValueRef interp_param, - unsigned index, unsigned comp_start, - unsigned num_components, - unsigned bitsize) -{ - LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false); - LLVMValueRef interp_param_f; - - interp_param_f = LLVMBuildBitCast(ctx->ac.builder, - interp_param, ctx->ac.v2f32, ""); - LLVMValueRef i = LLVMBuildExtractElement( - ctx->ac.builder, interp_param_f, ctx->ac.i32_0, ""); - LLVMValueRef j = LLVMBuildExtractElement( - ctx->ac.builder, interp_param_f, ctx->ac.i32_1, ""); - - /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */ - if (ctx->verified_interp && - !_mesa_hash_table_search(ctx->verified_interp, interp_param)) { - LLVMValueRef args[2]; - args[0] = i; - args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false); - LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, - args, 2, AC_FUNC_ATTR_READNONE); - ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, "")); - _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param); - } - - LLVMValueRef values[4]; - assert(bitsize == 16 || bitsize == 32); - for (unsigned comp = 0; comp < num_components; comp++) { - LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false); - if (bitsize == 16) { - values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number, - ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j); - } else { - values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, - ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j); - } - } - - return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components)); -} - -static LLVMValueRef load_input(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - unsigned offset_idx = instr->intrinsic == nir_intrinsic_load_input ? 0 : 1; - - /* We only lower inputs for fragment shaders ATM */ - ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[offset_idx]); - assert(offset); - assert(offset[0].i32 == 0); - - unsigned component = nir_intrinsic_component(instr); - unsigned index = nir_intrinsic_base(instr); - unsigned vertex_id = 2; /* P0 */ - - if (instr->intrinsic == nir_intrinsic_load_input_vertex) { - nir_const_value *src0 = nir_src_as_const_value(instr->src[0]); - - switch (src0[0].i32) { - case 0: - vertex_id = 2; - break; - case 1: - vertex_id = 0; - break; - case 2: - vertex_id = 1; - break; - default: - unreachable("Invalid vertex index"); - } - } - - LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false); - LLVMValueRef values[8]; - - /* Each component of a 64-bit value takes up two GL-level channels. */ - unsigned num_components = instr->dest.ssa.num_components; - unsigned bit_size = instr->dest.ssa.bit_size; - unsigned channels = - bit_size == 64 ? num_components * 2 : num_components; - - for (unsigned chan = 0; chan < channels; chan++) { - if (component + chan > 4) - attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false); - LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false); - values[chan] = ac_build_fs_interp_mov(&ctx->ac, - LLVMConstInt(ctx->ac.i32, vertex_id, false), - llvm_chan, - attr_number, - ac_get_arg(&ctx->ac, ctx->args->prim_mask)); - values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, ""); - values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan], - bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, ""); - } - - LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, channels); - if (bit_size == 64) { - LLVMTypeRef type = num_components == 1 ? ctx->ac.i64 : - LLVMVectorType(ctx->ac.i64, num_components); - result = LLVMBuildBitCast(ctx->ac.builder, result, type, ""); - } - return result; -} - -static void visit_intrinsic(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - LLVMValueRef result = NULL; - - switch (instr->intrinsic) { - case nir_intrinsic_ballot: - result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0])); - if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size) - result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, ""); - break; - case nir_intrinsic_read_invocation: - result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), - get_src(ctx, instr->src[1])); - break; - case nir_intrinsic_read_first_invocation: - result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL); - break; - case nir_intrinsic_load_subgroup_invocation: - result = ac_get_thread_id(&ctx->ac); - break; - case nir_intrinsic_load_work_group_id: { - LLVMValueRef values[3]; - - for (int i = 0; i < 3; i++) { - values[i] = ctx->args->workgroup_ids[i].used ? - ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i]) : ctx->ac.i32_0; - } - - result = ac_build_gather_values(&ctx->ac, values, 3); - break; - } - case nir_intrinsic_load_base_vertex: - case nir_intrinsic_load_first_vertex: - result = ctx->abi->load_base_vertex(ctx->abi); - break; - case nir_intrinsic_load_local_group_size: - result = ctx->abi->load_local_group_size(ctx->abi); - break; - case nir_intrinsic_load_vertex_id: - result = LLVMBuildAdd(ctx->ac.builder, - ac_get_arg(&ctx->ac, ctx->args->vertex_id), - ac_get_arg(&ctx->ac, ctx->args->base_vertex), ""); - break; - case nir_intrinsic_load_vertex_id_zero_base: { - result = ctx->abi->vertex_id; - break; - } - case nir_intrinsic_load_local_invocation_id: { - result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids); - break; - } - case nir_intrinsic_load_base_instance: - result = ac_get_arg(&ctx->ac, ctx->args->start_instance); - break; - case nir_intrinsic_load_draw_id: - result = ac_get_arg(&ctx->ac, ctx->args->draw_id); - break; - case nir_intrinsic_load_view_index: - result = ac_get_arg(&ctx->ac, ctx->args->view_index); - break; - case nir_intrinsic_load_invocation_id: - if (ctx->stage == MESA_SHADER_TESS_CTRL) { - result = ac_unpack_param(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), - 8, 5); - } else { - if (ctx->ac.chip_class >= GFX10) { - result = LLVMBuildAnd(ctx->ac.builder, - ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id), - LLVMConstInt(ctx->ac.i32, 127, 0), ""); - } else { - result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id); - } - } - break; - case nir_intrinsic_load_primitive_id: - if (ctx->stage == MESA_SHADER_GEOMETRY) { - result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id); - } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { - result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id); - } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id); - } else - fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); - break; - case nir_intrinsic_load_sample_id: - result = ac_unpack_param(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->ancillary), - 8, 4); - break; - case nir_intrinsic_load_sample_pos: - result = load_sample_pos(ctx); - break; - case nir_intrinsic_load_sample_mask_in: - result = ctx->abi->load_sample_mask_in(ctx->abi); - break; - case nir_intrinsic_load_frag_coord: { - LLVMValueRef values[4] = { - ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), - ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]), - ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]), - ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, - ac_get_arg(&ctx->ac, ctx->args->frag_pos[3])) - }; - result = ac_to_integer(&ctx->ac, - ac_build_gather_values(&ctx->ac, values, 4)); - break; - } - case nir_intrinsic_load_layer_id: - result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; - break; - case nir_intrinsic_load_front_face: - result = ac_get_arg(&ctx->ac, ctx->args->front_face); - break; - case nir_intrinsic_load_helper_invocation: - result = ac_build_load_helper_invocation(&ctx->ac); - break; - case nir_intrinsic_is_helper_invocation: - result = ac_build_is_helper_invocation(&ctx->ac); - break; - case nir_intrinsic_load_color0: - result = ctx->abi->color0; - break; - case nir_intrinsic_load_color1: - result = ctx->abi->color1; - break; - case nir_intrinsic_load_user_data_amd: - assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32); - result = ctx->abi->user_data; - break; - case nir_intrinsic_load_instance_id: - result = ctx->abi->instance_id; - break; - case nir_intrinsic_load_num_work_groups: - result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups); - break; - case nir_intrinsic_load_local_invocation_index: - result = visit_load_local_invocation_index(ctx); - break; - case nir_intrinsic_load_subgroup_id: - result = visit_load_subgroup_id(ctx); - break; - case nir_intrinsic_load_num_subgroups: - result = visit_load_num_subgroups(ctx); - break; - case nir_intrinsic_first_invocation: - result = visit_first_invocation(ctx); - break; - case nir_intrinsic_load_push_constant: - result = visit_load_push_constant(ctx, instr); - break; - case nir_intrinsic_vulkan_resource_index: { - LLVMValueRef index = get_src(ctx, instr->src[0]); - unsigned desc_set = nir_intrinsic_desc_set(instr); - unsigned binding = nir_intrinsic_binding(instr); - - result = ctx->abi->load_resource(ctx->abi, index, desc_set, - binding); - break; - } - case nir_intrinsic_vulkan_resource_reindex: - result = visit_vulkan_resource_reindex(ctx, instr); - break; - case nir_intrinsic_store_ssbo: - visit_store_ssbo(ctx, instr); - break; - case nir_intrinsic_load_ssbo: - result = visit_load_buffer(ctx, instr); - break; - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - result = visit_atomic_ssbo(ctx, instr); - break; - case nir_intrinsic_load_ubo: - result = visit_load_ubo_buffer(ctx, instr); - break; - case nir_intrinsic_get_buffer_size: - result = visit_get_buffer_size(ctx, instr); - break; - case nir_intrinsic_load_deref: - result = visit_load_var(ctx, instr); - break; - case nir_intrinsic_store_deref: - visit_store_var(ctx, instr); - break; - case nir_intrinsic_load_shared: - result = visit_load_shared(ctx, instr); - break; - case nir_intrinsic_store_shared: - visit_store_shared(ctx, instr); - break; - case nir_intrinsic_bindless_image_samples: - case nir_intrinsic_image_deref_samples: - result = visit_image_samples(ctx, instr); - break; - case nir_intrinsic_bindless_image_load: - result = visit_image_load(ctx, instr, true); - break; - case nir_intrinsic_image_deref_load: - result = visit_image_load(ctx, instr, false); - break; - case nir_intrinsic_bindless_image_store: - visit_image_store(ctx, instr, true); - break; - case nir_intrinsic_image_deref_store: - visit_image_store(ctx, instr, false); - break; - case nir_intrinsic_bindless_image_atomic_add: - case nir_intrinsic_bindless_image_atomic_imin: - case nir_intrinsic_bindless_image_atomic_umin: - case nir_intrinsic_bindless_image_atomic_imax: - case nir_intrinsic_bindless_image_atomic_umax: - case nir_intrinsic_bindless_image_atomic_and: - case nir_intrinsic_bindless_image_atomic_or: - case nir_intrinsic_bindless_image_atomic_xor: - case nir_intrinsic_bindless_image_atomic_exchange: - case nir_intrinsic_bindless_image_atomic_comp_swap: - case nir_intrinsic_bindless_image_atomic_inc_wrap: - case nir_intrinsic_bindless_image_atomic_dec_wrap: - result = visit_image_atomic(ctx, instr, true); - break; - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umin: - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_umax: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: - case nir_intrinsic_image_deref_atomic_inc_wrap: - case nir_intrinsic_image_deref_atomic_dec_wrap: - result = visit_image_atomic(ctx, instr, false); - break; - case nir_intrinsic_bindless_image_size: - result = visit_image_size(ctx, instr, true); - break; - case nir_intrinsic_image_deref_size: - result = visit_image_size(ctx, instr, false); - break; - case nir_intrinsic_shader_clock: - result = ac_build_shader_clock(&ctx->ac, - nir_intrinsic_memory_scope(instr)); - break; - case nir_intrinsic_discard: - case nir_intrinsic_discard_if: - emit_discard(ctx, instr); - break; - case nir_intrinsic_demote: - case nir_intrinsic_demote_if: - emit_demote(ctx, instr); - break; - case nir_intrinsic_memory_barrier: - case nir_intrinsic_group_memory_barrier: - case nir_intrinsic_memory_barrier_buffer: - case nir_intrinsic_memory_barrier_image: - case nir_intrinsic_memory_barrier_shared: - emit_membar(&ctx->ac, instr); - break; - case nir_intrinsic_scoped_barrier: { - assert(!(nir_intrinsic_memory_semantics(instr) & - (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE))); - - nir_variable_mode modes = nir_intrinsic_memory_modes(instr); - - unsigned wait_flags = 0; - if (modes & (nir_var_mem_global | nir_var_mem_ssbo)) - wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE; - if (modes & nir_var_mem_shared) - wait_flags |= AC_WAIT_LGKM; - - if (wait_flags) - ac_build_waitcnt(&ctx->ac, wait_flags); - - if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP) - ac_emit_barrier(&ctx->ac, ctx->stage); - break; - } - case nir_intrinsic_memory_barrier_tcs_patch: - break; - case nir_intrinsic_control_barrier: - ac_emit_barrier(&ctx->ac, ctx->stage); - break; - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: - case nir_intrinsic_shared_atomic_fadd: { - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], - instr->src[1].ssa->bit_size); - result = visit_var_atomic(ctx, instr, ptr, 1); - break; - } - case nir_intrinsic_deref_atomic_add: - case nir_intrinsic_deref_atomic_imin: - case nir_intrinsic_deref_atomic_umin: - case nir_intrinsic_deref_atomic_imax: - case nir_intrinsic_deref_atomic_umax: - case nir_intrinsic_deref_atomic_and: - case nir_intrinsic_deref_atomic_or: - case nir_intrinsic_deref_atomic_xor: - case nir_intrinsic_deref_atomic_exchange: - case nir_intrinsic_deref_atomic_comp_swap: - case nir_intrinsic_deref_atomic_fadd: { - LLVMValueRef ptr = get_src(ctx, instr->src[0]); - result = visit_var_atomic(ctx, instr, ptr, 1); - break; - } - case nir_intrinsic_load_barycentric_pixel: - result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr)); - break; - case nir_intrinsic_load_barycentric_centroid: - result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr)); - break; - case nir_intrinsic_load_barycentric_sample: - result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr)); - break; - case nir_intrinsic_load_barycentric_model: - result = barycentric_model(ctx); - break; - case nir_intrinsic_load_barycentric_at_offset: { - LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); - result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset); - break; - } - case nir_intrinsic_load_barycentric_at_sample: { - LLVMValueRef sample_id = get_src(ctx, instr->src[0]); - result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id); - break; - } - case nir_intrinsic_load_interpolated_input: { - /* We assume any indirect loads have been lowered away */ - ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]); - assert(offset); - assert(offset[0].i32 == 0); - - LLVMValueRef interp_param = get_src(ctx, instr->src[0]); - unsigned index = nir_intrinsic_base(instr); - unsigned component = nir_intrinsic_component(instr); - result = load_interpolated_input(ctx, interp_param, index, - component, - instr->dest.ssa.num_components, - instr->dest.ssa.bit_size); - break; - } - case nir_intrinsic_load_input: - case nir_intrinsic_load_input_vertex: - result = load_input(ctx, instr); - break; - case nir_intrinsic_emit_vertex: - ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); - break; - case nir_intrinsic_emit_vertex_with_counter: { - unsigned stream = nir_intrinsic_stream_id(instr); - LLVMValueRef next_vertex = get_src(ctx, instr->src[0]); - ctx->abi->emit_vertex_with_counter(ctx->abi, stream, - next_vertex, - ctx->abi->outputs); - break; - } - case nir_intrinsic_end_primitive: - case nir_intrinsic_end_primitive_with_counter: - ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); - break; - case nir_intrinsic_load_tess_coord: - result = ctx->abi->load_tess_coord(ctx->abi); - break; - case nir_intrinsic_load_tess_level_outer: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false); - break; - case nir_intrinsic_load_tess_level_inner: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false); - break; - case nir_intrinsic_load_tess_level_outer_default: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true); - break; - case nir_intrinsic_load_tess_level_inner_default: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true); - break; - case nir_intrinsic_load_patch_vertices_in: - result = ctx->abi->load_patch_vertices_in(ctx->abi); - break; - case nir_intrinsic_vote_all: { - LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0])); - result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); - break; - } - case nir_intrinsic_vote_any: { - LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0])); - result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); - break; - } - case nir_intrinsic_shuffle: - if (ctx->ac.chip_class == GFX8 || - ctx->ac.chip_class == GFX9 || - (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) { - result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), - get_src(ctx, instr->src[1])); - } else { - LLVMValueRef src = get_src(ctx, instr->src[0]); - LLVMValueRef index = get_src(ctx, instr->src[1]); - LLVMTypeRef type = LLVMTypeOf(src); - struct waterfall_context wctx; - LLVMValueRef index_val; - - index_val = enter_waterfall(ctx, &wctx, index, true); - - src = LLVMBuildZExt(ctx->ac.builder, src, - ctx->ac.i32, ""); - - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", - ctx->ac.i32, - (LLVMValueRef []) { src, index_val }, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - - result = LLVMBuildTrunc(ctx->ac.builder, result, type, ""); - - result = exit_waterfall(ctx, &wctx, result); - } - break; - case nir_intrinsic_reduce: - result = ac_build_reduce(&ctx->ac, - get_src(ctx, instr->src[0]), - instr->const_index[0], - instr->const_index[1]); - break; - case nir_intrinsic_inclusive_scan: - result = ac_build_inclusive_scan(&ctx->ac, - get_src(ctx, instr->src[0]), - instr->const_index[0]); - break; - case nir_intrinsic_exclusive_scan: - result = ac_build_exclusive_scan(&ctx->ac, - get_src(ctx, instr->src[0]), - instr->const_index[0]); - break; - case nir_intrinsic_quad_broadcast: { - unsigned lane = nir_src_as_uint(instr->src[1]); - result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), - lane, lane, lane, lane); - break; - } - case nir_intrinsic_quad_swap_horizontal: - result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3 ,2); - break; - case nir_intrinsic_quad_swap_vertical: - result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0 ,1); - break; - case nir_intrinsic_quad_swap_diagonal: - result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1 ,0); - break; - case nir_intrinsic_quad_swizzle_amd: { - uint32_t mask = nir_intrinsic_swizzle_mask(instr); - result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), - mask & 0x3, (mask >> 2) & 0x3, - (mask >> 4) & 0x3, (mask >> 6) & 0x3); - break; - } - case nir_intrinsic_masked_swizzle_amd: { - uint32_t mask = nir_intrinsic_swizzle_mask(instr); - result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask); - break; - } - case nir_intrinsic_write_invocation_amd: - result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]), - get_src(ctx, instr->src[1]), - get_src(ctx, instr->src[2])); - break; - case nir_intrinsic_mbcnt_amd: - result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0])); - break; - case nir_intrinsic_load_scratch: { - LLVMValueRef offset = get_src(ctx, instr->src[0]); - LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, - offset); - LLVMTypeRef comp_type = - LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); - LLVMTypeRef vec_type = - instr->dest.ssa.num_components == 1 ? comp_type : - LLVMVectorType(comp_type, instr->dest.ssa.num_components); - unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, - LLVMPointerType(vec_type, addr_space), ""); - result = LLVMBuildLoad(ctx->ac.builder, ptr, ""); - break; - } - case nir_intrinsic_store_scratch: { - LLVMValueRef offset = get_src(ctx, instr->src[1]); - LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, - offset); - LLVMTypeRef comp_type = - LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size); - unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, - LLVMPointerType(comp_type, addr_space), ""); - LLVMValueRef src = get_src(ctx, instr->src[0]); - unsigned wrmask = nir_intrinsic_write_mask(instr); - while (wrmask) { - int start, count; - u_bit_scan_consecutive_range(&wrmask, &start, &count); - - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false); - LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, ""); - LLVMTypeRef vec_type = - count == 1 ? comp_type : LLVMVectorType(comp_type, count); - offset_ptr = LLVMBuildBitCast(ctx->ac.builder, - offset_ptr, - LLVMPointerType(vec_type, addr_space), - ""); - LLVMValueRef offset_src = - ac_extract_components(&ctx->ac, src, start, count); - LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr); - } - break; - } - case nir_intrinsic_load_constant: { - unsigned base = nir_intrinsic_base(instr); - unsigned range = nir_intrinsic_range(instr); - - LLVMValueRef offset = get_src(ctx, instr->src[0]); - offset = LLVMBuildAdd(ctx->ac.builder, offset, - LLVMConstInt(ctx->ac.i32, base, false), ""); - - /* Clamp the offset to avoid out-of-bound access because global - * instructions can't handle them. - */ - LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false); - LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, - offset, size, ""); - offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, ""); - - LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, - offset); - LLVMTypeRef comp_type = - LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); - LLVMTypeRef vec_type = - instr->dest.ssa.num_components == 1 ? comp_type : - LLVMVectorType(comp_type, instr->dest.ssa.num_components); - unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, - LLVMPointerType(vec_type, addr_space), ""); - result = LLVMBuildLoad(ctx->ac.builder, ptr, ""); - break; - } - default: - fprintf(stderr, "Unknown intrinsic: "); - nir_print_instr(&instr->instr, stderr); - fprintf(stderr, "\n"); - break; - } - if (result) { - ctx->ssa_defs[instr->dest.ssa.index] = result; - } -} - -static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx, - unsigned base_index, - unsigned constant_index, - LLVMValueRef dynamic_index) -{ - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0); - LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index, - LLVMConstInt(ctx->ac.i32, constant_index, 0), ""); - - /* Bindless uniforms are 64bit so multiple index by 8 */ - index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), ""); - offset = LLVMBuildAdd(ctx->ac.builder, offset, index, ""); + return LLVMBuildBitCast(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->pull_model), + ctx->ac.v3i32, ""); +} + +static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, LLVMValueRef interp_param, + unsigned index, unsigned comp_start, + unsigned num_components, unsigned bitsize) +{ + LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false); + LLVMValueRef interp_param_f; + + interp_param_f = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, ""); + LLVMValueRef i = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_0, ""); + LLVMValueRef j = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_1, ""); + + /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */ + if (ctx->verified_interp && !_mesa_hash_table_search(ctx->verified_interp, interp_param)) { + LLVMValueRef args[2]; + args[0] = i; + args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false); + LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, args, 2, + AC_FUNC_ATTR_READNONE); + ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, "")); + _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param); + } + + LLVMValueRef values[4]; + assert(bitsize == 16 || bitsize == 32); + for (unsigned comp = 0; comp < num_components; comp++) { + LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false); + if (bitsize == 16) { + values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number, + ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j); + } else { + values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, + ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j); + } + } + + return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components)); +} + +static LLVMValueRef load_input(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + unsigned offset_idx = instr->intrinsic == nir_intrinsic_load_input ? 0 : 1; + + /* We only lower inputs for fragment shaders ATM */ + ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[offset_idx]); + assert(offset); + assert(offset[0].i32 == 0); + + unsigned component = nir_intrinsic_component(instr); + unsigned index = nir_intrinsic_base(instr); + unsigned vertex_id = 2; /* P0 */ + + if (instr->intrinsic == nir_intrinsic_load_input_vertex) { + nir_const_value *src0 = nir_src_as_const_value(instr->src[0]); + + switch (src0[0].i32) { + case 0: + vertex_id = 2; + break; + case 1: + vertex_id = 0; + break; + case 2: + vertex_id = 1; + break; + default: + unreachable("Invalid vertex index"); + } + } + + LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false); + LLVMValueRef values[8]; + + /* Each component of a 64-bit value takes up two GL-level channels. */ + unsigned num_components = instr->dest.ssa.num_components; + unsigned bit_size = instr->dest.ssa.bit_size; + unsigned channels = bit_size == 64 ? num_components * 2 : num_components; + + for (unsigned chan = 0; chan < channels; chan++) { + if (component + chan > 4) + attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false); + LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false); + values[chan] = + ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), llvm_chan, + attr_number, ac_get_arg(&ctx->ac, ctx->args->prim_mask)); + values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, ""); + values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan], + bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, ""); + } + + LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, channels); + if (bit_size == 64) { + LLVMTypeRef type = + num_components == 1 ? ctx->ac.i64 : LLVMVectorType(ctx->ac.i64, num_components); + result = LLVMBuildBitCast(ctx->ac.builder, result, type, ""); + } + return result; +} + +static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + LLVMValueRef result = NULL; + + switch (instr->intrinsic) { + case nir_intrinsic_ballot: + result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0])); + if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size) + result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, ""); + break; + case nir_intrinsic_read_invocation: + result = + ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1])); + break; + case nir_intrinsic_read_first_invocation: + result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL); + break; + case nir_intrinsic_load_subgroup_invocation: + result = ac_get_thread_id(&ctx->ac); + break; + case nir_intrinsic_load_work_group_id: { + LLVMValueRef values[3]; + + for (int i = 0; i < 3; i++) { + values[i] = ctx->args->workgroup_ids[i].used + ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i]) + : ctx->ac.i32_0; + } + + result = ac_build_gather_values(&ctx->ac, values, 3); + break; + } + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_first_vertex: + result = ctx->abi->load_base_vertex(ctx->abi); + break; + case nir_intrinsic_load_local_group_size: + result = ctx->abi->load_local_group_size(ctx->abi); + break; + case nir_intrinsic_load_vertex_id: + result = LLVMBuildAdd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->vertex_id), + ac_get_arg(&ctx->ac, ctx->args->base_vertex), ""); + break; + case nir_intrinsic_load_vertex_id_zero_base: { + result = ctx->abi->vertex_id; + break; + } + case nir_intrinsic_load_local_invocation_id: { + result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids); + break; + } + case nir_intrinsic_load_base_instance: + result = ac_get_arg(&ctx->ac, ctx->args->start_instance); + break; + case nir_intrinsic_load_draw_id: + result = ac_get_arg(&ctx->ac, ctx->args->draw_id); + break; + case nir_intrinsic_load_view_index: + result = ac_get_arg(&ctx->ac, ctx->args->view_index); + break; + case nir_intrinsic_load_invocation_id: + if (ctx->stage == MESA_SHADER_TESS_CTRL) { + result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5); + } else { + if (ctx->ac.chip_class >= GFX10) { + result = + LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id), + LLVMConstInt(ctx->ac.i32, 127, 0), ""); + } else { + result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id); + } + } + break; + case nir_intrinsic_load_primitive_id: + if (ctx->stage == MESA_SHADER_GEOMETRY) { + result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id); + } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { + result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id); + } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { + result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id); + } else + fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); + break; + case nir_intrinsic_load_sample_id: + result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4); + break; + case nir_intrinsic_load_sample_pos: + result = load_sample_pos(ctx); + break; + case nir_intrinsic_load_sample_mask_in: + result = ctx->abi->load_sample_mask_in(ctx->abi); + break; + case nir_intrinsic_load_frag_coord: { + LLVMValueRef values[4] = { + ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]), + ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]), + ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))}; + result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4)); + break; + } + case nir_intrinsic_load_layer_id: + result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; + break; + case nir_intrinsic_load_front_face: + result = ac_get_arg(&ctx->ac, ctx->args->front_face); + break; + case nir_intrinsic_load_helper_invocation: + result = ac_build_load_helper_invocation(&ctx->ac); + break; + case nir_intrinsic_is_helper_invocation: + result = ac_build_is_helper_invocation(&ctx->ac); + break; + case nir_intrinsic_load_color0: + result = ctx->abi->color0; + break; + case nir_intrinsic_load_color1: + result = ctx->abi->color1; + break; + case nir_intrinsic_load_user_data_amd: + assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32); + result = ctx->abi->user_data; + break; + case nir_intrinsic_load_instance_id: + result = ctx->abi->instance_id; + break; + case nir_intrinsic_load_num_work_groups: + result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups); + break; + case nir_intrinsic_load_local_invocation_index: + result = visit_load_local_invocation_index(ctx); + break; + case nir_intrinsic_load_subgroup_id: + result = visit_load_subgroup_id(ctx); + break; + case nir_intrinsic_load_num_subgroups: + result = visit_load_num_subgroups(ctx); + break; + case nir_intrinsic_first_invocation: + result = visit_first_invocation(ctx); + break; + case nir_intrinsic_load_push_constant: + result = visit_load_push_constant(ctx, instr); + break; + case nir_intrinsic_vulkan_resource_index: { + LLVMValueRef index = get_src(ctx, instr->src[0]); + unsigned desc_set = nir_intrinsic_desc_set(instr); + unsigned binding = nir_intrinsic_binding(instr); + + result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding); + break; + } + case nir_intrinsic_vulkan_resource_reindex: + result = visit_vulkan_resource_reindex(ctx, instr); + break; + case nir_intrinsic_store_ssbo: + visit_store_ssbo(ctx, instr); + break; + case nir_intrinsic_load_ssbo: + result = visit_load_buffer(ctx, instr); + break; + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + result = visit_atomic_ssbo(ctx, instr); + break; + case nir_intrinsic_load_ubo: + result = visit_load_ubo_buffer(ctx, instr); + break; + case nir_intrinsic_get_buffer_size: + result = visit_get_buffer_size(ctx, instr); + break; + case nir_intrinsic_load_deref: + result = visit_load_var(ctx, instr); + break; + case nir_intrinsic_store_deref: + visit_store_var(ctx, instr); + break; + case nir_intrinsic_load_shared: + result = visit_load_shared(ctx, instr); + break; + case nir_intrinsic_store_shared: + visit_store_shared(ctx, instr); + break; + case nir_intrinsic_bindless_image_samples: + case nir_intrinsic_image_deref_samples: + result = visit_image_samples(ctx, instr); + break; + case nir_intrinsic_bindless_image_load: + result = visit_image_load(ctx, instr, true); + break; + case nir_intrinsic_image_deref_load: + result = visit_image_load(ctx, instr, false); + break; + case nir_intrinsic_bindless_image_store: + visit_image_store(ctx, instr, true); + break; + case nir_intrinsic_image_deref_store: + visit_image_store(ctx, instr, false); + break; + case nir_intrinsic_bindless_image_atomic_add: + case nir_intrinsic_bindless_image_atomic_imin: + case nir_intrinsic_bindless_image_atomic_umin: + case nir_intrinsic_bindless_image_atomic_imax: + case nir_intrinsic_bindless_image_atomic_umax: + case nir_intrinsic_bindless_image_atomic_and: + case nir_intrinsic_bindless_image_atomic_or: + case nir_intrinsic_bindless_image_atomic_xor: + case nir_intrinsic_bindless_image_atomic_exchange: + case nir_intrinsic_bindless_image_atomic_comp_swap: + case nir_intrinsic_bindless_image_atomic_inc_wrap: + case nir_intrinsic_bindless_image_atomic_dec_wrap: + result = visit_image_atomic(ctx, instr, true); + break; + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic_inc_wrap: + case nir_intrinsic_image_deref_atomic_dec_wrap: + result = visit_image_atomic(ctx, instr, false); + break; + case nir_intrinsic_bindless_image_size: + result = visit_image_size(ctx, instr, true); + break; + case nir_intrinsic_image_deref_size: + result = visit_image_size(ctx, instr, false); + break; + case nir_intrinsic_shader_clock: + result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr)); + break; + case nir_intrinsic_discard: + case nir_intrinsic_discard_if: + emit_discard(ctx, instr); + break; + case nir_intrinsic_demote: + case nir_intrinsic_demote_if: + emit_demote(ctx, instr); + break; + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + emit_membar(&ctx->ac, instr); + break; + case nir_intrinsic_scoped_barrier: { + assert(!(nir_intrinsic_memory_semantics(instr) & + (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE))); + + nir_variable_mode modes = nir_intrinsic_memory_modes(instr); + + unsigned wait_flags = 0; + if (modes & (nir_var_mem_global | nir_var_mem_ssbo)) + wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE; + if (modes & nir_var_mem_shared) + wait_flags |= AC_WAIT_LGKM; + + if (wait_flags) + ac_build_waitcnt(&ctx->ac, wait_flags); + + if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP) + ac_emit_barrier(&ctx->ac, ctx->stage); + break; + } + case nir_intrinsic_memory_barrier_tcs_patch: + break; + case nir_intrinsic_control_barrier: + ac_emit_barrier(&ctx->ac, ctx->stage); + break; + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_fadd: { + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size); + result = visit_var_atomic(ctx, instr, ptr, 1); + break; + } + case nir_intrinsic_deref_atomic_add: + case nir_intrinsic_deref_atomic_imin: + case nir_intrinsic_deref_atomic_umin: + case nir_intrinsic_deref_atomic_imax: + case nir_intrinsic_deref_atomic_umax: + case nir_intrinsic_deref_atomic_and: + case nir_intrinsic_deref_atomic_or: + case nir_intrinsic_deref_atomic_xor: + case nir_intrinsic_deref_atomic_exchange: + case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fadd: { + LLVMValueRef ptr = get_src(ctx, instr->src[0]); + result = visit_var_atomic(ctx, instr, ptr, 1); + break; + } + case nir_intrinsic_load_barycentric_pixel: + result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr)); + break; + case nir_intrinsic_load_barycentric_centroid: + result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr)); + break; + case nir_intrinsic_load_barycentric_sample: + result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr)); + break; + case nir_intrinsic_load_barycentric_model: + result = barycentric_model(ctx); + break; + case nir_intrinsic_load_barycentric_at_offset: { + LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); + result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset); + break; + } + case nir_intrinsic_load_barycentric_at_sample: { + LLVMValueRef sample_id = get_src(ctx, instr->src[0]); + result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id); + break; + } + case nir_intrinsic_load_interpolated_input: { + /* We assume any indirect loads have been lowered away */ + ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]); + assert(offset); + assert(offset[0].i32 == 0); + + LLVMValueRef interp_param = get_src(ctx, instr->src[0]); + unsigned index = nir_intrinsic_base(instr); + unsigned component = nir_intrinsic_component(instr); + result = load_interpolated_input(ctx, interp_param, index, component, + instr->dest.ssa.num_components, instr->dest.ssa.bit_size); + break; + } + case nir_intrinsic_load_input: + case nir_intrinsic_load_input_vertex: + result = load_input(ctx, instr); + break; + case nir_intrinsic_emit_vertex: + ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); + break; + case nir_intrinsic_emit_vertex_with_counter: { + unsigned stream = nir_intrinsic_stream_id(instr); + LLVMValueRef next_vertex = get_src(ctx, instr->src[0]); + ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs); + break; + } + case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: + ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); + break; + case nir_intrinsic_load_tess_coord: + result = ctx->abi->load_tess_coord(ctx->abi); + break; + case nir_intrinsic_load_tess_level_outer: + result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false); + break; + case nir_intrinsic_load_tess_level_inner: + result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false); + break; + case nir_intrinsic_load_tess_level_outer_default: + result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true); + break; + case nir_intrinsic_load_tess_level_inner_default: + result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true); + break; + case nir_intrinsic_load_patch_vertices_in: + result = ctx->abi->load_patch_vertices_in(ctx->abi); + break; + case nir_intrinsic_vote_all: { + LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0])); + result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); + break; + } + case nir_intrinsic_vote_any: { + LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0])); + result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); + break; + } + case nir_intrinsic_shuffle: + if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 || + (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) { + result = + ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1])); + } else { + LLVMValueRef src = get_src(ctx, instr->src[0]); + LLVMValueRef index = get_src(ctx, instr->src[1]); + LLVMTypeRef type = LLVMTypeOf(src); + struct waterfall_context wctx; + LLVMValueRef index_val; + + index_val = enter_waterfall(ctx, &wctx, index, true); + + src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, ""); + + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32, + (LLVMValueRef[]){src, index_val}, 2, + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + + result = LLVMBuildTrunc(ctx->ac.builder, result, type, ""); + + result = exit_waterfall(ctx, &wctx, result); + } + break; + case nir_intrinsic_reduce: + result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0], + instr->const_index[1]); + break; + case nir_intrinsic_inclusive_scan: + result = + ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]); + break; + case nir_intrinsic_exclusive_scan: + result = + ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]); + break; + case nir_intrinsic_quad_broadcast: { + unsigned lane = nir_src_as_uint(instr->src[1]); + result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane); + break; + } + case nir_intrinsic_quad_swap_horizontal: + result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2); + break; + case nir_intrinsic_quad_swap_vertical: + result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1); + break; + case nir_intrinsic_quad_swap_diagonal: + result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0); + break; + case nir_intrinsic_quad_swizzle_amd: { + uint32_t mask = nir_intrinsic_swizzle_mask(instr); + result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3, + (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3); + break; + } + case nir_intrinsic_masked_swizzle_amd: { + uint32_t mask = nir_intrinsic_swizzle_mask(instr); + result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask); + break; + } + case nir_intrinsic_write_invocation_amd: + result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]), + get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2])); + break; + case nir_intrinsic_mbcnt_amd: + result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0])); + break; + case nir_intrinsic_load_scratch: { + LLVMValueRef offset = get_src(ctx, instr->src[0]); + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset); + LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1 + ? comp_type + : LLVMVectorType(comp_type, instr->dest.ssa.num_components); + unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), ""); + result = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + break; + } + case nir_intrinsic_store_scratch: { + LLVMValueRef offset = get_src(ctx, instr->src[1]); + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset); + LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size); + unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), ""); + LLVMValueRef src = get_src(ctx, instr->src[0]); + unsigned wrmask = nir_intrinsic_write_mask(instr); + while (wrmask) { + int start, count; + u_bit_scan_consecutive_range(&wrmask, &start, &count); + + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false); + LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, ""); + LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count); + offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr, + LLVMPointerType(vec_type, addr_space), ""); + LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count); + LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr); + } + break; + } + case nir_intrinsic_load_constant: { + unsigned base = nir_intrinsic_base(instr); + unsigned range = nir_intrinsic_range(instr); + + LLVMValueRef offset = get_src(ctx, instr->src[0]); + offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), ""); + + /* Clamp the offset to avoid out-of-bound access because global + * instructions can't handle them. + */ + LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false); + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, ""); + offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, ""); + + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset); + LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1 + ? comp_type + : LLVMVectorType(comp_type, instr->dest.ssa.num_components); + unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), ""); + result = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + break; + } + default: + fprintf(stderr, "Unknown intrinsic: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + break; + } + if (result) { + ctx->ssa_defs[instr->dest.ssa.index] = result; + } +} + +static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx, unsigned base_index, + unsigned constant_index, + LLVMValueRef dynamic_index) +{ + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0); + LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index, + LLVMConstInt(ctx->ac.i32, constant_index, 0), ""); + + /* Bindless uniforms are 64bit so multiple index by 8 */ + index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), ""); + offset = LLVMBuildAdd(ctx->ac.builder, offset, index, ""); - LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0); + LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0); - LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset, - NULL, 0, 0, true, true); + LLVMValueRef ret = + ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset, NULL, 0, 0, true, true); - return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, ""); + return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, ""); } struct sampler_desc_address { - unsigned descriptor_set; - unsigned base_index; /* binding in vulkan */ - unsigned constant_index; - LLVMValueRef dynamic_index; - bool image; - bool bindless; + unsigned descriptor_set; + unsigned base_index; /* binding in vulkan */ + unsigned constant_index; + LLVMValueRef dynamic_index; + bool image; + bool bindless; }; -static struct sampler_desc_address -get_sampler_desc_internal(struct ac_nir_context *ctx, - nir_deref_instr *deref_instr, - const nir_instr *instr, - bool image) -{ - LLVMValueRef index = NULL; - unsigned constant_index = 0; - unsigned descriptor_set; - unsigned base_index; - bool bindless = false; - - if (!deref_instr) { - descriptor_set = 0; - if (image) { - nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr); - base_index = 0; - bindless = true; - index = get_src(ctx, img_instr->src[0]); - } else { - nir_tex_instr *tex_instr = nir_instr_as_tex(instr); - int sampSrcIdx = nir_tex_instr_src_index(tex_instr, - nir_tex_src_sampler_handle); - if (sampSrcIdx != -1) { - base_index = 0; - bindless = true; - index = get_src(ctx, tex_instr->src[sampSrcIdx].src); - } else { - assert(tex_instr && !image); - base_index = tex_instr->sampler_index; - } - } - } else { - while(deref_instr->deref_type != nir_deref_type_var) { - if (deref_instr->deref_type == nir_deref_type_array) { - unsigned array_size = glsl_get_aoa_size(deref_instr->type); - if (!array_size) - array_size = 1; - - if (nir_src_is_const(deref_instr->arr.index)) { - constant_index += array_size * nir_src_as_uint(deref_instr->arr.index); - } else { - LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index); - - indirect = LLVMBuildMul(ctx->ac.builder, indirect, - LLVMConstInt(ctx->ac.i32, array_size, false), ""); - - if (!index) - index = indirect; - else - index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); - } - - deref_instr = nir_src_as_deref(deref_instr->parent); - } else if (deref_instr->deref_type == nir_deref_type_struct) { - unsigned sidx = deref_instr->strct.index; - deref_instr = nir_src_as_deref(deref_instr->parent); - constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx); - } else { - unreachable("Unsupported deref type"); - } - } - descriptor_set = deref_instr->var->data.descriptor_set; - - if (deref_instr->var->data.bindless) { - /* For now just assert on unhandled variable types */ - assert(deref_instr->var->data.mode == nir_var_uniform); - - base_index = deref_instr->var->data.driver_location; - bindless = true; - - index = index ? index : ctx->ac.i32_0; - index = get_bindless_index_from_uniform(ctx, base_index, - constant_index, index); - } else - base_index = deref_instr->var->data.binding; - } - return (struct sampler_desc_address) { - .descriptor_set = descriptor_set, - .base_index = base_index, - .constant_index = constant_index, - .dynamic_index = index, - .image = image, - .bindless = bindless, - }; +static struct sampler_desc_address get_sampler_desc_internal(struct ac_nir_context *ctx, + nir_deref_instr *deref_instr, + const nir_instr *instr, bool image) +{ + LLVMValueRef index = NULL; + unsigned constant_index = 0; + unsigned descriptor_set; + unsigned base_index; + bool bindless = false; + + if (!deref_instr) { + descriptor_set = 0; + if (image) { + nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr); + base_index = 0; + bindless = true; + index = get_src(ctx, img_instr->src[0]); + } else { + nir_tex_instr *tex_instr = nir_instr_as_tex(instr); + int sampSrcIdx = nir_tex_instr_src_index(tex_instr, nir_tex_src_sampler_handle); + if (sampSrcIdx != -1) { + base_index = 0; + bindless = true; + index = get_src(ctx, tex_instr->src[sampSrcIdx].src); + } else { + assert(tex_instr && !image); + base_index = tex_instr->sampler_index; + } + } + } else { + while (deref_instr->deref_type != nir_deref_type_var) { + if (deref_instr->deref_type == nir_deref_type_array) { + unsigned array_size = glsl_get_aoa_size(deref_instr->type); + if (!array_size) + array_size = 1; + + if (nir_src_is_const(deref_instr->arr.index)) { + constant_index += array_size * nir_src_as_uint(deref_instr->arr.index); + } else { + LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index); + + indirect = LLVMBuildMul(ctx->ac.builder, indirect, + LLVMConstInt(ctx->ac.i32, array_size, false), ""); + + if (!index) + index = indirect; + else + index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); + } + + deref_instr = nir_src_as_deref(deref_instr->parent); + } else if (deref_instr->deref_type == nir_deref_type_struct) { + unsigned sidx = deref_instr->strct.index; + deref_instr = nir_src_as_deref(deref_instr->parent); + constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx); + } else { + unreachable("Unsupported deref type"); + } + } + descriptor_set = deref_instr->var->data.descriptor_set; + + if (deref_instr->var->data.bindless) { + /* For now just assert on unhandled variable types */ + assert(deref_instr->var->data.mode == nir_var_uniform); + + base_index = deref_instr->var->data.driver_location; + bindless = true; + + index = index ? index : ctx->ac.i32_0; + index = get_bindless_index_from_uniform(ctx, base_index, constant_index, index); + } else + base_index = deref_instr->var->data.binding; + } + return (struct sampler_desc_address){ + .descriptor_set = descriptor_set, + .base_index = base_index, + .constant_index = constant_index, + .dynamic_index = index, + .image = image, + .bindless = bindless, + }; } /* Extract any possibly divergent index into a separate value that can be fed * into get_sampler_desc with the same arguments. */ -static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, - nir_deref_instr *deref_instr, - const nir_instr *instr, - bool image) -{ - struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image); - return addr.dynamic_index; -} - -static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, - nir_deref_instr *deref_instr, - enum ac_descriptor_type desc_type, - const nir_instr *instr, - LLVMValueRef index, - bool image, bool write) -{ - struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image); - return ctx->abi->load_sampler_desc(ctx->abi, - addr.descriptor_set, - addr.base_index, - addr.constant_index, index, - desc_type, addr.image, write, addr.bindless); +static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr, + const nir_instr *instr, bool image) +{ + struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image); + return addr.dynamic_index; +} + +static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr, + enum ac_descriptor_type desc_type, const nir_instr *instr, + LLVMValueRef index, bool image, bool write) +{ + struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image); + return ctx->abi->load_sampler_desc(ctx->abi, addr.descriptor_set, addr.base_index, + addr.constant_index, index, desc_type, addr.image, write, + addr.bindless); } /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. @@ -4411,1235 +4100,1131 @@ * GFX8: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ -static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, - LLVMValueRef res, LLVMValueRef samp) +static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValueRef res, + LLVMValueRef samp) { - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef img7, samp0; + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef img7, samp0; - if (ctx->ac.chip_class >= GFX8) - return samp; + if (ctx->ac.chip_class >= GFX8) + return samp; - img7 = LLVMBuildExtractElement(builder, res, - LLVMConstInt(ctx->ac.i32, 7, 0), ""); - samp0 = LLVMBuildExtractElement(builder, samp, - LLVMConstInt(ctx->ac.i32, 0, 0), ""); - samp0 = LLVMBuildAnd(builder, samp0, img7, ""); - return LLVMBuildInsertElement(builder, samp, samp0, - LLVMConstInt(ctx->ac.i32, 0, 0), ""); -} - -static void tex_fetch_ptrs(struct ac_nir_context *ctx, - nir_tex_instr *instr, - struct waterfall_context *wctx, - LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, - LLVMValueRef *fmask_ptr) -{ - nir_deref_instr *texture_deref_instr = NULL; - nir_deref_instr *sampler_deref_instr = NULL; - int plane = -1; - - for (unsigned i = 0; i < instr->num_srcs; i++) { - switch (instr->src[i].src_type) { - case nir_tex_src_texture_deref: - texture_deref_instr = nir_src_as_deref(instr->src[i].src); - break; - case nir_tex_src_sampler_deref: - sampler_deref_instr = nir_src_as_deref(instr->src[i].src); - break; - case nir_tex_src_plane: - plane = nir_src_as_int(instr->src[i].src); - break; - default: - break; - } - } - - LLVMValueRef texture_dynamic_index = get_sampler_desc_index(ctx, texture_deref_instr, - &instr->instr, false); - if (!sampler_deref_instr) - sampler_deref_instr = texture_deref_instr; - - LLVMValueRef sampler_dynamic_index = get_sampler_desc_index(ctx, sampler_deref_instr, - &instr->instr, false); - if (instr->texture_non_uniform) - texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true); - - if (instr->sampler_non_uniform) - sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true); - - enum ac_descriptor_type main_descriptor = instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE; - - if (plane >= 0) { - assert(instr->op != nir_texop_txf_ms && - instr->op != nir_texop_samples_identical); - assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF); - - main_descriptor = AC_DESC_PLANE_0 + plane; - } - - if (instr->op == nir_texop_fragment_mask_fetch) { - /* The fragment mask is fetched from the compressed - * multisampled surface. - */ - main_descriptor = AC_DESC_FMASK; - } - - *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr, - texture_dynamic_index, false, false); - - if (samp_ptr) { - *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr, - sampler_dynamic_index, false, false); - if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) - *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); - } - if (fmask_ptr && (instr->op == nir_texop_txf_ms || - instr->op == nir_texop_samples_identical)) - *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, - &instr->instr, texture_dynamic_index, false, false); -} - -static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, - LLVMValueRef coord) -{ - coord = ac_to_float(ctx, coord); - coord = ac_build_round(ctx, coord); - coord = ac_to_integer(ctx, coord); - return coord; + img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), ""); + samp0 = LLVMBuildExtractElement(builder, samp, LLVMConstInt(ctx->ac.i32, 0, 0), ""); + samp0 = LLVMBuildAnd(builder, samp0, img7, ""); + return LLVMBuildInsertElement(builder, samp, samp0, LLVMConstInt(ctx->ac.i32, 0, 0), ""); +} + +static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr, + struct waterfall_context *wctx, LLVMValueRef *res_ptr, + LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr) +{ + nir_deref_instr *texture_deref_instr = NULL; + nir_deref_instr *sampler_deref_instr = NULL; + int plane = -1; + + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_texture_deref: + texture_deref_instr = nir_src_as_deref(instr->src[i].src); + break; + case nir_tex_src_sampler_deref: + sampler_deref_instr = nir_src_as_deref(instr->src[i].src); + break; + case nir_tex_src_plane: + plane = nir_src_as_int(instr->src[i].src); + break; + default: + break; + } + } + + LLVMValueRef texture_dynamic_index = + get_sampler_desc_index(ctx, texture_deref_instr, &instr->instr, false); + if (!sampler_deref_instr) + sampler_deref_instr = texture_deref_instr; + + LLVMValueRef sampler_dynamic_index = + get_sampler_desc_index(ctx, sampler_deref_instr, &instr->instr, false); + if (instr->texture_non_uniform) + texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true); + + if (instr->sampler_non_uniform) + sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true); + + enum ac_descriptor_type main_descriptor = + instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE; + + if (plane >= 0) { + assert(instr->op != nir_texop_txf_ms && instr->op != nir_texop_samples_identical); + assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF); + + main_descriptor = AC_DESC_PLANE_0 + plane; + } + + if (instr->op == nir_texop_fragment_mask_fetch) { + /* The fragment mask is fetched from the compressed + * multisampled surface. + */ + main_descriptor = AC_DESC_FMASK; + } + + *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr, + texture_dynamic_index, false, false); + + if (samp_ptr) { + *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr, + sampler_dynamic_index, false, false); + if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) + *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); + } + if (fmask_ptr && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical)) + *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr, + texture_dynamic_index, false, false); +} + +static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord) +{ + coord = ac_to_float(ctx, coord); + coord = ac_build_round(ctx, coord); + coord = ac_to_integer(ctx, coord); + return coord; } static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) { - LLVMValueRef result = NULL; - struct ac_image_args args = { 0 }; - LLVMValueRef fmask_ptr = NULL, sample_index = NULL; - LLVMValueRef ddx = NULL, ddy = NULL; - unsigned offset_src = 0; - struct waterfall_context wctx[2] = {{{0}}}; - - tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr); - - for (unsigned i = 0; i < instr->num_srcs; i++) { - switch (instr->src[i].src_type) { - case nir_tex_src_coord: { - LLVMValueRef coord = get_src(ctx, instr->src[i].src); - for (unsigned chan = 0; chan < instr->coord_components; ++chan) - args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); - break; - } - case nir_tex_src_projector: - break; - case nir_tex_src_comparator: - if (instr->is_shadow) { - args.compare = get_src(ctx, instr->src[i].src); - args.compare = ac_to_float(&ctx->ac, args.compare); - } - break; - case nir_tex_src_offset: - args.offset = get_src(ctx, instr->src[i].src); - offset_src = i; - break; - case nir_tex_src_bias: - args.bias = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_lod: { - if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0) - args.level_zero = true; - else - args.lod = get_src(ctx, instr->src[i].src); - break; - } - case nir_tex_src_ms_index: - sample_index = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_ms_mcs: - break; - case nir_tex_src_ddx: - ddx = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_ddy: - ddy = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_min_lod: - args.min_lod = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_texture_offset: - case nir_tex_src_sampler_offset: - case nir_tex_src_plane: - default: - break; - } - } - - if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - result = get_buffer_size(ctx, args.resource, true); - goto write_result; - } - - if (instr->op == nir_texop_texture_samples) { - LLVMValueRef res, samples, is_msaa; - LLVMValueRef default_sample; - - res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, ""); - samples = LLVMBuildExtractElement(ctx->ac.builder, res, - LLVMConstInt(ctx->ac.i32, 3, false), ""); - is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, - LLVMConstInt(ctx->ac.i32, 28, false), ""); - is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, - LLVMConstInt(ctx->ac.i32, 0xe, false), ""); - is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa, - LLVMConstInt(ctx->ac.i32, 0xe, false), ""); - - samples = LLVMBuildLShr(ctx->ac.builder, samples, - LLVMConstInt(ctx->ac.i32, 16, false), ""); - samples = LLVMBuildAnd(ctx->ac.builder, samples, - LLVMConstInt(ctx->ac.i32, 0xf, false), ""); - samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, - samples, ""); - - if (ctx->abi->robust_buffer_access) { - LLVMValueRef dword1, is_null_descriptor; - - /* Extract the second dword of the descriptor, if it's - * all zero, then it's a null descriptor. - */ - dword1 = LLVMBuildExtractElement(ctx->ac.builder, res, - LLVMConstInt(ctx->ac.i32, 1, false), ""); - is_null_descriptor = - LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1, - LLVMConstInt(ctx->ac.i32, 0, false), ""); - default_sample = - LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, - ctx->ac.i32_0, ctx->ac.i32_1, ""); - } else { - default_sample = ctx->ac.i32_1; - } - - samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, - default_sample, ""); - result = samples; - goto write_result; - } - - if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { - LLVMValueRef offset[3], pack; - for (unsigned chan = 0; chan < 3; ++chan) - offset[chan] = ctx->ac.i32_0; - - unsigned num_components = ac_get_llvm_num_components(args.offset); - for (unsigned chan = 0; chan < num_components; chan++) { - offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan); - offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], - LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); - if (chan) - offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], - LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); - } - pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); - pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - args.offset = pack; - } - - /* Section 8.23.1 (Depth Texture Comparison Mode) of the - * OpenGL 4.5 spec says: - * - * "If the texture’s internal format indicates a fixed-point - * depth texture, then D_t and D_ref are clamped to the - * range [0, 1]; otherwise no clamping is performed." - * - * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, - * so the depth comparison value isn't clamped for Z16 and - * Z24 anymore. Do it manually here for GFX8-9; GFX10 has - * an explicitly clamped 32-bit float format. - */ - if (args.compare && - ctx->ac.chip_class >= GFX8 && - ctx->ac.chip_class <= GFX9 && - ctx->abi->clamp_shadow_reference) { - LLVMValueRef upgraded, clamped; - - upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, - LLVMConstInt(ctx->ac.i32, 3, false), ""); - upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, - LLVMConstInt(ctx->ac.i32, 29, false), ""); - upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, ""); - clamped = ac_build_clamp(&ctx->ac, args.compare); - args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, - args.compare, ""); - } - - /* pack derivatives */ - if (ddx || ddy) { - int num_src_deriv_channels, num_dest_deriv_channels; - switch (instr->sampler_dim) { - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - num_src_deriv_channels = 3; - num_dest_deriv_channels = 3; - break; - case GLSL_SAMPLER_DIM_2D: - default: - num_src_deriv_channels = 2; - num_dest_deriv_channels = 2; - break; - case GLSL_SAMPLER_DIM_1D: - num_src_deriv_channels = 1; - if (ctx->ac.chip_class == GFX9) { - num_dest_deriv_channels = 2; - } else { - num_dest_deriv_channels = 1; - } - break; - } - - for (unsigned i = 0; i < num_src_deriv_channels; i++) { - args.derivs[i] = ac_to_float(&ctx->ac, - ac_llvm_extract_elem(&ctx->ac, ddx, i)); - args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, - ac_llvm_extract_elem(&ctx->ac, ddy, i)); - } - for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { - args.derivs[i] = ctx->ac.f32_0; - args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; - } - } - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) { - for (unsigned chan = 0; chan < instr->coord_components; chan++) - args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]); - if (instr->coord_components == 3) - args.coords[3] = LLVMGetUndef(ctx->ac.f32); - ac_prepare_cube_coords(&ctx->ac, - instr->op == nir_texop_txd, instr->is_array, - instr->op == nir_texop_lod, args.coords, args.derivs); - } - - /* Texture coordinates fixups */ - if (instr->coord_components > 1 && - instr->sampler_dim == GLSL_SAMPLER_DIM_1D && - instr->is_array && - instr->op != nir_texop_txf) { - args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]); - } - - if (instr->coord_components > 2 && - (instr->sampler_dim == GLSL_SAMPLER_DIM_2D || - instr->sampler_dim == GLSL_SAMPLER_DIM_MS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && - instr->is_array && - instr->op != nir_texop_txf && - instr->op != nir_texop_txf_ms && - instr->op != nir_texop_fragment_fetch && - instr->op != nir_texop_fragment_mask_fetch) { - args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]); - } - - if (ctx->ac.chip_class == GFX9 && - instr->sampler_dim == GLSL_SAMPLER_DIM_1D && - instr->op != nir_texop_lod) { - LLVMValueRef filler; - if (instr->op == nir_texop_txf) - filler = ctx->ac.i32_0; - else - filler = LLVMConstReal(ctx->ac.f32, 0.5); - - if (instr->is_array) - args.coords[2] = args.coords[1]; - args.coords[1] = filler; - } - - /* Pack sample index */ - if (sample_index && (instr->op == nir_texop_txf_ms || - instr->op == nir_texop_fragment_fetch)) - args.coords[instr->coord_components] = sample_index; - - if (instr->op == nir_texop_samples_identical) { - struct ac_image_args txf_args = { 0 }; - memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords)); - - txf_args.dmask = 0xf; - txf_args.resource = fmask_ptr; - txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d; - result = build_tex_intrinsic(ctx, instr, &txf_args); - - result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); - result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); - goto write_result; - } - - if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS || - instr->sampler_dim == GLSL_SAMPLER_DIM_MS) && - instr->op != nir_texop_txs && - instr->op != nir_texop_fragment_fetch && - instr->op != nir_texop_fragment_mask_fetch) { - unsigned sample_chan = instr->is_array ? 3 : 2; - args.coords[sample_chan] = adjust_sample_index_using_fmask( - &ctx->ac, args.coords[0], args.coords[1], - instr->is_array ? args.coords[2] : NULL, - args.coords[sample_chan], fmask_ptr); - } - - if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) { - int num_offsets = instr->src[offset_src].src.ssa->num_components; - num_offsets = MIN2(num_offsets, instr->coord_components); - for (unsigned i = 0; i < num_offsets; ++i) { - args.coords[i] = LLVMBuildAdd( - ctx->ac.builder, args.coords[i], - LLVMConstInt(ctx->ac.i32, nir_src_comp_as_uint(instr->src[offset_src].src, i), false), ""); - } - args.offset = NULL; - } - - /* DMASK was repurposed for GATHER4. 4 components are always - * returned and DMASK works like a swizzle - it selects - * the component to fetch. The only valid DMASK values are - * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns - * (red,red,red,red) etc.) The ISA document doesn't mention - * this. - */ - args.dmask = 0xf; - if (instr->op == nir_texop_tg4) { - if (instr->is_shadow) - args.dmask = 1; - else - args.dmask = 1 << instr->component; - } - - if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) { - args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array); - args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT; - } - - /* Adjust the number of coordinates because we only need (x,y) for 2D - * multisampled images and (x,y,layer) for 2D multisampled layered - * images or for multisampled input attachments. - */ - if (instr->op == nir_texop_fragment_mask_fetch) { - if (args.dim == ac_image_2dmsaa) { - args.dim = ac_image_2d; - } else { - assert(args.dim == ac_image_2darraymsaa); - args.dim = ac_image_2darray; - } - } - - assert(instr->dest.is_ssa); - args.d16 = instr->dest.ssa.bit_size == 16; - - result = build_tex_intrinsic(ctx, instr, &args); - - if (instr->op == nir_texop_query_levels) - result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); - else if (instr->is_shadow && instr->is_new_style_shadow && - instr->op != nir_texop_txs && instr->op != nir_texop_lod && - instr->op != nir_texop_tg4) - result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); - else if (instr->op == nir_texop_txs && - instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && - instr->is_array) { - LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); - LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); - LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); - z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); - result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, ""); - } else if (ctx->ac.chip_class == GFX9 && - instr->op == nir_texop_txs && - instr->sampler_dim == GLSL_SAMPLER_DIM_1D && - instr->is_array) { - LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); - LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); - result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, - ctx->ac.i32_1, ""); - } else if (instr->dest.ssa.num_components != 4) - result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components); + LLVMValueRef result = NULL; + struct ac_image_args args = {0}; + LLVMValueRef fmask_ptr = NULL, sample_index = NULL; + LLVMValueRef ddx = NULL, ddy = NULL; + unsigned offset_src = 0; + struct waterfall_context wctx[2] = {{{0}}}; + + tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_coord: { + LLVMValueRef coord = get_src(ctx, instr->src[i].src); + for (unsigned chan = 0; chan < instr->coord_components; ++chan) + args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); + break; + } + case nir_tex_src_projector: + break; + case nir_tex_src_comparator: + if (instr->is_shadow) { + args.compare = get_src(ctx, instr->src[i].src); + args.compare = ac_to_float(&ctx->ac, args.compare); + } + break; + case nir_tex_src_offset: + args.offset = get_src(ctx, instr->src[i].src); + offset_src = i; + break; + case nir_tex_src_bias: + args.bias = get_src(ctx, instr->src[i].src); + break; + case nir_tex_src_lod: { + if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0) + args.level_zero = true; + else + args.lod = get_src(ctx, instr->src[i].src); + break; + } + case nir_tex_src_ms_index: + sample_index = get_src(ctx, instr->src[i].src); + break; + case nir_tex_src_ms_mcs: + break; + case nir_tex_src_ddx: + ddx = get_src(ctx, instr->src[i].src); + break; + case nir_tex_src_ddy: + ddy = get_src(ctx, instr->src[i].src); + break; + case nir_tex_src_min_lod: + args.min_lod = get_src(ctx, instr->src[i].src); + break; + case nir_tex_src_texture_offset: + case nir_tex_src_sampler_offset: + case nir_tex_src_plane: + default: + break; + } + } + + if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + result = get_buffer_size(ctx, args.resource, true); + goto write_result; + } + + if (instr->op == nir_texop_texture_samples) { + LLVMValueRef res, samples, is_msaa; + LLVMValueRef default_sample; + + res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, ""); + samples = + LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), ""); + is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 28, false), ""); + is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, LLVMConstInt(ctx->ac.i32, 0xe, false), ""); + is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa, + LLVMConstInt(ctx->ac.i32, 0xe, false), ""); + + samples = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 16, false), ""); + samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 0xf, false), ""); + samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, ""); + + if (ctx->abi->robust_buffer_access) { + LLVMValueRef dword1, is_null_descriptor; + + /* Extract the second dword of the descriptor, if it's + * all zero, then it's a null descriptor. + */ + dword1 = + LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 1, false), ""); + is_null_descriptor = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1, + LLVMConstInt(ctx->ac.i32, 0, false), ""); + default_sample = + LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, ctx->ac.i32_0, ctx->ac.i32_1, ""); + } else { + default_sample = ctx->ac.i32_1; + } + + samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, default_sample, ""); + result = samples; + goto write_result; + } + + if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { + LLVMValueRef offset[3], pack; + for (unsigned chan = 0; chan < 3; ++chan) + offset[chan] = ctx->ac.i32_0; + + unsigned num_components = ac_get_llvm_num_components(args.offset); + for (unsigned chan = 0; chan < num_components; chan++) { + offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan); + offset[chan] = + LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); + if (chan) + offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], + LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); + } + pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); + pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); + args.offset = pack; + } + + /* Section 8.23.1 (Depth Texture Comparison Mode) of the + * OpenGL 4.5 spec says: + * + * "If the texture’s internal format indicates a fixed-point + * depth texture, then D_t and D_ref are clamped to the + * range [0, 1]; otherwise no clamping is performed." + * + * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, + * so the depth comparison value isn't clamped for Z16 and + * Z24 anymore. Do it manually here for GFX8-9; GFX10 has + * an explicitly clamped 32-bit float format. + */ + if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->ac.chip_class <= GFX9 && + ctx->abi->clamp_shadow_reference) { + LLVMValueRef upgraded, clamped; + + upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, + LLVMConstInt(ctx->ac.i32, 3, false), ""); + upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->ac.i32, 29, false), ""); + upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, ""); + clamped = ac_build_clamp(&ctx->ac, args.compare); + args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, args.compare, ""); + } + + /* pack derivatives */ + if (ddx || ddy) { + int num_src_deriv_channels, num_dest_deriv_channels; + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + num_src_deriv_channels = 3; + num_dest_deriv_channels = 3; + break; + case GLSL_SAMPLER_DIM_2D: + default: + num_src_deriv_channels = 2; + num_dest_deriv_channels = 2; + break; + case GLSL_SAMPLER_DIM_1D: + num_src_deriv_channels = 1; + if (ctx->ac.chip_class == GFX9) { + num_dest_deriv_channels = 2; + } else { + num_dest_deriv_channels = 1; + } + break; + } + + for (unsigned i = 0; i < num_src_deriv_channels; i++) { + args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); + args.derivs[num_dest_deriv_channels + i] = + ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); + } + for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { + args.derivs[i] = ctx->ac.f32_0; + args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; + } + } + + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) { + for (unsigned chan = 0; chan < instr->coord_components; chan++) + args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]); + if (instr->coord_components == 3) + args.coords[3] = LLVMGetUndef(ctx->ac.f32); + ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array, + instr->op == nir_texop_lod, args.coords, args.derivs); + } + + /* Texture coordinates fixups */ + if (instr->coord_components > 1 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D && + instr->is_array && instr->op != nir_texop_txf) { + args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]); + } + + if (instr->coord_components > 2 && + (instr->sampler_dim == GLSL_SAMPLER_DIM_2D || instr->sampler_dim == GLSL_SAMPLER_DIM_MS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && + instr->is_array && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms && + instr->op != nir_texop_fragment_fetch && instr->op != nir_texop_fragment_mask_fetch) { + args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]); + } + + if (ctx->ac.chip_class == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D && + instr->op != nir_texop_lod) { + LLVMValueRef filler; + if (instr->op == nir_texop_txf) + filler = ctx->ac.i32_0; + else + filler = LLVMConstReal(ctx->ac.f32, 0.5); + + if (instr->is_array) + args.coords[2] = args.coords[1]; + args.coords[1] = filler; + } + + /* Pack sample index */ + if (sample_index && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_fragment_fetch)) + args.coords[instr->coord_components] = sample_index; + + if (instr->op == nir_texop_samples_identical) { + struct ac_image_args txf_args = {0}; + memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords)); + + txf_args.dmask = 0xf; + txf_args.resource = fmask_ptr; + txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d; + result = build_tex_intrinsic(ctx, instr, &txf_args); + + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); + goto write_result; + } + + if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS || + instr->sampler_dim == GLSL_SAMPLER_DIM_MS) && + instr->op != nir_texop_txs && instr->op != nir_texop_fragment_fetch && + instr->op != nir_texop_fragment_mask_fetch) { + unsigned sample_chan = instr->is_array ? 3 : 2; + args.coords[sample_chan] = adjust_sample_index_using_fmask( + &ctx->ac, args.coords[0], args.coords[1], instr->is_array ? args.coords[2] : NULL, + args.coords[sample_chan], fmask_ptr); + } + + if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) { + int num_offsets = instr->src[offset_src].src.ssa->num_components; + num_offsets = MIN2(num_offsets, instr->coord_components); + for (unsigned i = 0; i < num_offsets; ++i) { + args.coords[i] = LLVMBuildAdd( + ctx->ac.builder, args.coords[i], + LLVMConstInt(ctx->ac.i32, nir_src_comp_as_uint(instr->src[offset_src].src, i), false), + ""); + } + args.offset = NULL; + } + + /* DMASK was repurposed for GATHER4. 4 components are always + * returned and DMASK works like a swizzle - it selects + * the component to fetch. The only valid DMASK values are + * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns + * (red,red,red,red) etc.) The ISA document doesn't mention + * this. + */ + args.dmask = 0xf; + if (instr->op == nir_texop_tg4) { + if (instr->is_shadow) + args.dmask = 1; + else + args.dmask = 1 << instr->component; + } + + if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) { + args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array); + args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT; + } + + /* Adjust the number of coordinates because we only need (x,y) for 2D + * multisampled images and (x,y,layer) for 2D multisampled layered + * images or for multisampled input attachments. + */ + if (instr->op == nir_texop_fragment_mask_fetch) { + if (args.dim == ac_image_2dmsaa) { + args.dim = ac_image_2d; + } else { + assert(args.dim == ac_image_2darraymsaa); + args.dim = ac_image_2darray; + } + } + + assert(instr->dest.is_ssa); + args.d16 = instr->dest.ssa.bit_size == 16; + + result = build_tex_intrinsic(ctx, instr, &args); + + if (instr->op == nir_texop_query_levels) + result = + LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); + else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs && + instr->op != nir_texop_lod && instr->op != nir_texop_tg4) + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && + instr->is_array) { + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); + LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); + z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); + result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, ""); + } else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs && + instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) { + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); + result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, ctx->ac.i32_1, ""); + } else if (instr->dest.ssa.num_components != 4) + result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components); write_result: - if (result) { - assert(instr->dest.is_ssa); - result = ac_to_integer(&ctx->ac, result); - - for (int i = ARRAY_SIZE(wctx); --i >= 0;) { - result = exit_waterfall(ctx, wctx + i, result); - } + if (result) { + assert(instr->dest.is_ssa); + result = ac_to_integer(&ctx->ac, result); + + for (int i = ARRAY_SIZE(wctx); --i >= 0;) { + result = exit_waterfall(ctx, wctx + i, result); + } - ctx->ssa_defs[instr->dest.ssa.index] = result; - } + ctx->ssa_defs[instr->dest.ssa.index] = result; + } } static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr) { - LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); - LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, ""); + LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); + LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, ""); - ctx->ssa_defs[instr->dest.ssa.index] = result; - _mesa_hash_table_insert(ctx->phis, instr, result); + ctx->ssa_defs[instr->dest.ssa.index] = result; + _mesa_hash_table_insert(ctx->phis, instr, result); } -static void visit_post_phi(struct ac_nir_context *ctx, - nir_phi_instr *instr, - LLVMValueRef llvm_phi) +static void visit_post_phi(struct ac_nir_context *ctx, nir_phi_instr *instr, LLVMValueRef llvm_phi) { - nir_foreach_phi_src(src, instr) { - LLVMBasicBlockRef block = get_block(ctx, src->pred); - LLVMValueRef llvm_src = get_src(ctx, src->src); + nir_foreach_phi_src (src, instr) { + LLVMBasicBlockRef block = get_block(ctx, src->pred); + LLVMValueRef llvm_src = get_src(ctx, src->src); - LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1); - } + LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1); + } } static void phi_post_pass(struct ac_nir_context *ctx) { - hash_table_foreach(ctx->phis, entry) { - visit_post_phi(ctx, (nir_phi_instr*)entry->key, - (LLVMValueRef)entry->data); - } -} - - -static bool is_def_used_in_an_export(const nir_ssa_def* def) { - nir_foreach_use(use_src, def) { - if (use_src->parent_instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr); - if (instr->intrinsic == nir_intrinsic_store_deref) - return true; - } else if (use_src->parent_instr->type == nir_instr_type_alu) { - nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr); - if (instr->op == nir_op_vec4 && - is_def_used_in_an_export(&instr->dest.dest.ssa)) { - return true; - } - } - } - return false; -} - -static void visit_ssa_undef(struct ac_nir_context *ctx, - const nir_ssa_undef_instr *instr) -{ - unsigned num_components = instr->def.num_components; - LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); - - if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) { - LLVMValueRef undef; - - if (num_components == 1) - undef = LLVMGetUndef(type); - else { - undef = LLVMGetUndef(LLVMVectorType(type, num_components)); - } - ctx->ssa_defs[instr->def.index] = undef; - } else { - LLVMValueRef zero = LLVMConstInt(type, 0, false); - if (num_components > 1) { - zero = ac_build_gather_values_extended( - &ctx->ac, &zero, 4, 0, false, false); - } - ctx->ssa_defs[instr->def.index] = zero; - } -} - -static void visit_jump(struct ac_llvm_context *ctx, - const nir_jump_instr *instr) -{ - switch (instr->type) { - case nir_jump_break: - ac_build_break(ctx); - break; - case nir_jump_continue: - ac_build_continue(ctx); - break; - default: - fprintf(stderr, "Unknown NIR jump instr: "); - nir_print_instr(&instr->instr, stderr); - fprintf(stderr, "\n"); - abort(); - } -} - -static LLVMTypeRef -glsl_base_to_llvm_type(struct ac_llvm_context *ac, - enum glsl_base_type type) -{ - switch (type) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_SUBROUTINE: - return ac->i32; - case GLSL_TYPE_INT8: - case GLSL_TYPE_UINT8: - return ac->i8; - case GLSL_TYPE_INT16: - case GLSL_TYPE_UINT16: - return ac->i16; - case GLSL_TYPE_FLOAT: - return ac->f32; - case GLSL_TYPE_FLOAT16: - return ac->f16; - case GLSL_TYPE_INT64: - case GLSL_TYPE_UINT64: - return ac->i64; - case GLSL_TYPE_DOUBLE: - return ac->f64; - default: - unreachable("unknown GLSL type"); - } -} - -static LLVMTypeRef -glsl_to_llvm_type(struct ac_llvm_context *ac, - const struct glsl_type *type) -{ - if (glsl_type_is_scalar(type)) { - return glsl_base_to_llvm_type(ac, glsl_get_base_type(type)); - } - - if (glsl_type_is_vector(type)) { - return LLVMVectorType( - glsl_base_to_llvm_type(ac, glsl_get_base_type(type)), - glsl_get_vector_elements(type)); - } - - if (glsl_type_is_matrix(type)) { - return LLVMArrayType( - glsl_to_llvm_type(ac, glsl_get_column_type(type)), - glsl_get_matrix_columns(type)); - } - - if (glsl_type_is_array(type)) { - return LLVMArrayType( - glsl_to_llvm_type(ac, glsl_get_array_element(type)), - glsl_get_length(type)); - } - - assert(glsl_type_is_struct_or_ifc(type)); - - LLVMTypeRef member_types[glsl_get_length(type)]; - - for (unsigned i = 0; i < glsl_get_length(type); i++) { - member_types[i] = - glsl_to_llvm_type(ac, - glsl_get_struct_field(type, i)); - } - - return LLVMStructTypeInContext(ac->context, member_types, - glsl_get_length(type), false); -} - -static void visit_deref(struct ac_nir_context *ctx, - nir_deref_instr *instr) -{ - if (instr->mode != nir_var_mem_shared && - instr->mode != nir_var_mem_global) - return; - - LLVMValueRef result = NULL; - switch(instr->deref_type) { - case nir_deref_type_var: { - struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var); - result = entry->data; - break; - } - case nir_deref_type_struct: - if (instr->mode == nir_var_mem_global) { - nir_deref_instr *parent = nir_deref_instr_parent(instr); - uint64_t offset = glsl_get_struct_field_offset(parent->type, - instr->strct.index); - result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), - LLVMConstInt(ctx->ac.i32, offset, 0)); - } else { - result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), - LLVMConstInt(ctx->ac.i32, instr->strct.index, 0)); - } - break; - case nir_deref_type_array: - if (instr->mode == nir_var_mem_global) { - nir_deref_instr *parent = nir_deref_instr_parent(instr); - unsigned stride = glsl_get_explicit_stride(parent->type); - - if ((glsl_type_is_matrix(parent->type) && - glsl_matrix_type_is_row_major(parent->type)) || - (glsl_type_is_vector(parent->type) && stride == 0)) - stride = type_scalar_size_bytes(parent->type); - - assert(stride > 0); - LLVMValueRef index = get_src(ctx, instr->arr.index); - if (LLVMTypeOf(index) != ctx->ac.i64) - index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, ""); - - LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), ""); - - result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset); - } else { - result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), - get_src(ctx, instr->arr.index)); - } - break; - case nir_deref_type_ptr_as_array: - if (instr->mode == nir_var_mem_global) { - unsigned stride = nir_deref_instr_ptr_as_array_stride(instr); - - LLVMValueRef index = get_src(ctx, instr->arr.index); - if (LLVMTypeOf(index) != ctx->ac.i64) - index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, ""); - - LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), ""); - - result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset); - } else { - result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), - get_src(ctx, instr->arr.index)); - } - break; - case nir_deref_type_cast: { - result = get_src(ctx, instr->parent); - - /* We can't use the structs from LLVM because the shader - * specifies its own offsets. */ - LLVMTypeRef pointee_type = ctx->ac.i8; - if (instr->mode == nir_var_mem_shared) - pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type); - - unsigned address_space; - - switch(instr->mode) { - case nir_var_mem_shared: - address_space = AC_ADDR_SPACE_LDS; - break; - case nir_var_mem_global: - address_space = AC_ADDR_SPACE_GLOBAL; - break; - default: - unreachable("Unhandled address space"); - } - - LLVMTypeRef type = LLVMPointerType(pointee_type, address_space); - - if (LLVMTypeOf(result) != type) { - if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) { - result = LLVMBuildBitCast(ctx->ac.builder, result, - type, ""); - } else { - result = LLVMBuildIntToPtr(ctx->ac.builder, result, - type, ""); - } - } - break; - } - default: - unreachable("Unhandled deref_instr deref type"); - } + hash_table_foreach(ctx->phis, entry) + { + visit_post_phi(ctx, (nir_phi_instr *)entry->key, (LLVMValueRef)entry->data); + } +} + +static bool is_def_used_in_an_export(const nir_ssa_def *def) +{ + nir_foreach_use (use_src, def) { + if (use_src->parent_instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr); + if (instr->intrinsic == nir_intrinsic_store_deref) + return true; + } else if (use_src->parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr); + if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) { + return true; + } + } + } + return false; +} + +static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr) +{ + unsigned num_components = instr->def.num_components; + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); + + if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) { + LLVMValueRef undef; + + if (num_components == 1) + undef = LLVMGetUndef(type); + else { + undef = LLVMGetUndef(LLVMVectorType(type, num_components)); + } + ctx->ssa_defs[instr->def.index] = undef; + } else { + LLVMValueRef zero = LLVMConstInt(type, 0, false); + if (num_components > 1) { + zero = ac_build_gather_values_extended(&ctx->ac, &zero, 4, 0, false, false); + } + ctx->ssa_defs[instr->def.index] = zero; + } +} + +static void visit_jump(struct ac_llvm_context *ctx, const nir_jump_instr *instr) +{ + switch (instr->type) { + case nir_jump_break: + ac_build_break(ctx); + break; + case nir_jump_continue: + ac_build_continue(ctx); + break; + default: + fprintf(stderr, "Unknown NIR jump instr: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + abort(); + } +} + +static LLVMTypeRef glsl_base_to_llvm_type(struct ac_llvm_context *ac, enum glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SUBROUTINE: + return ac->i32; + case GLSL_TYPE_INT8: + case GLSL_TYPE_UINT8: + return ac->i8; + case GLSL_TYPE_INT16: + case GLSL_TYPE_UINT16: + return ac->i16; + case GLSL_TYPE_FLOAT: + return ac->f32; + case GLSL_TYPE_FLOAT16: + return ac->f16; + case GLSL_TYPE_INT64: + case GLSL_TYPE_UINT64: + return ac->i64; + case GLSL_TYPE_DOUBLE: + return ac->f64; + default: + unreachable("unknown GLSL type"); + } +} + +static LLVMTypeRef glsl_to_llvm_type(struct ac_llvm_context *ac, const struct glsl_type *type) +{ + if (glsl_type_is_scalar(type)) { + return glsl_base_to_llvm_type(ac, glsl_get_base_type(type)); + } + + if (glsl_type_is_vector(type)) { + return LLVMVectorType(glsl_base_to_llvm_type(ac, glsl_get_base_type(type)), + glsl_get_vector_elements(type)); + } + + if (glsl_type_is_matrix(type)) { + return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_column_type(type)), + glsl_get_matrix_columns(type)); + } + + if (glsl_type_is_array(type)) { + return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_array_element(type)), + glsl_get_length(type)); + } + + assert(glsl_type_is_struct_or_ifc(type)); + + LLVMTypeRef member_types[glsl_get_length(type)]; + + for (unsigned i = 0; i < glsl_get_length(type); i++) { + member_types[i] = glsl_to_llvm_type(ac, glsl_get_struct_field(type, i)); + } + + return LLVMStructTypeInContext(ac->context, member_types, glsl_get_length(type), false); +} + +static void visit_deref(struct ac_nir_context *ctx, nir_deref_instr *instr) +{ + if (instr->mode != nir_var_mem_shared && instr->mode != nir_var_mem_global) + return; + + LLVMValueRef result = NULL; + switch (instr->deref_type) { + case nir_deref_type_var: { + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var); + result = entry->data; + break; + } + case nir_deref_type_struct: + if (instr->mode == nir_var_mem_global) { + nir_deref_instr *parent = nir_deref_instr_parent(instr); + uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index); + result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), + LLVMConstInt(ctx->ac.i32, offset, 0)); + } else { + result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), + LLVMConstInt(ctx->ac.i32, instr->strct.index, 0)); + } + break; + case nir_deref_type_array: + if (instr->mode == nir_var_mem_global) { + nir_deref_instr *parent = nir_deref_instr_parent(instr); + unsigned stride = glsl_get_explicit_stride(parent->type); + + if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) || + (glsl_type_is_vector(parent->type) && stride == 0)) + stride = type_scalar_size_bytes(parent->type); + + assert(stride > 0); + LLVMValueRef index = get_src(ctx, instr->arr.index); + if (LLVMTypeOf(index) != ctx->ac.i64) + index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, ""); + + LLVMValueRef offset = + LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), ""); + + result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset); + } else { + result = + ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index)); + } + break; + case nir_deref_type_ptr_as_array: + if (instr->mode == nir_var_mem_global) { + unsigned stride = nir_deref_instr_ptr_as_array_stride(instr); + + LLVMValueRef index = get_src(ctx, instr->arr.index); + if (LLVMTypeOf(index) != ctx->ac.i64) + index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, ""); + + LLVMValueRef offset = + LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), ""); + + result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset); + } else { + result = + ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index)); + } + break; + case nir_deref_type_cast: { + result = get_src(ctx, instr->parent); + + /* We can't use the structs from LLVM because the shader + * specifies its own offsets. */ + LLVMTypeRef pointee_type = ctx->ac.i8; + if (instr->mode == nir_var_mem_shared) + pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type); + + unsigned address_space; + + switch (instr->mode) { + case nir_var_mem_shared: + address_space = AC_ADDR_SPACE_LDS; + break; + case nir_var_mem_global: + address_space = AC_ADDR_SPACE_GLOBAL; + break; + default: + unreachable("Unhandled address space"); + } + + LLVMTypeRef type = LLVMPointerType(pointee_type, address_space); + + if (LLVMTypeOf(result) != type) { + if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) { + result = LLVMBuildBitCast(ctx->ac.builder, result, type, ""); + } else { + result = LLVMBuildIntToPtr(ctx->ac.builder, result, type, ""); + } + } + break; + } + default: + unreachable("Unhandled deref_instr deref type"); + } - ctx->ssa_defs[instr->dest.ssa.index] = result; + ctx->ssa_defs[instr->dest.ssa.index] = result; } -static void visit_cf_list(struct ac_nir_context *ctx, - struct exec_list *list); +static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list); static void visit_block(struct ac_nir_context *ctx, nir_block *block) { - nir_foreach_instr(instr, block) - { - switch (instr->type) { - case nir_instr_type_alu: - visit_alu(ctx, nir_instr_as_alu(instr)); - break; - case nir_instr_type_load_const: - visit_load_const(ctx, nir_instr_as_load_const(instr)); - break; - case nir_instr_type_intrinsic: - visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); - break; - case nir_instr_type_tex: - visit_tex(ctx, nir_instr_as_tex(instr)); - break; - case nir_instr_type_phi: - visit_phi(ctx, nir_instr_as_phi(instr)); - break; - case nir_instr_type_ssa_undef: - visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); - break; - case nir_instr_type_jump: - visit_jump(&ctx->ac, nir_instr_as_jump(instr)); - break; - case nir_instr_type_deref: - visit_deref(ctx, nir_instr_as_deref(instr)); - break; - default: - fprintf(stderr, "Unknown NIR instr type: "); - nir_print_instr(instr, stderr); - fprintf(stderr, "\n"); - abort(); - } - } + nir_foreach_instr (instr, block) { + switch (instr->type) { + case nir_instr_type_alu: + visit_alu(ctx, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: + visit_load_const(ctx, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_intrinsic: + visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + visit_tex(ctx, nir_instr_as_tex(instr)); + break; + case nir_instr_type_phi: + visit_phi(ctx, nir_instr_as_phi(instr)); + break; + case nir_instr_type_ssa_undef: + visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); + break; + case nir_instr_type_jump: + visit_jump(&ctx->ac, nir_instr_as_jump(instr)); + break; + case nir_instr_type_deref: + visit_deref(ctx, nir_instr_as_deref(instr)); + break; + default: + fprintf(stderr, "Unknown NIR instr type: "); + nir_print_instr(instr, stderr); + fprintf(stderr, "\n"); + abort(); + } + } - _mesa_hash_table_insert(ctx->defs, block, - LLVMGetInsertBlock(ctx->ac.builder)); + _mesa_hash_table_insert(ctx->defs, block, LLVMGetInsertBlock(ctx->ac.builder)); } static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt) { - LLVMValueRef value = get_src(ctx, if_stmt->condition); + LLVMValueRef value = get_src(ctx, if_stmt->condition); - nir_block *then_block = - (nir_block *) exec_list_get_head(&if_stmt->then_list); + nir_block *then_block = (nir_block *)exec_list_get_head(&if_stmt->then_list); - ac_build_uif(&ctx->ac, value, then_block->index); + ac_build_uif(&ctx->ac, value, then_block->index); - visit_cf_list(ctx, &if_stmt->then_list); + visit_cf_list(ctx, &if_stmt->then_list); - if (!exec_list_is_empty(&if_stmt->else_list)) { - nir_block *else_block = - (nir_block *) exec_list_get_head(&if_stmt->else_list); + if (!exec_list_is_empty(&if_stmt->else_list)) { + nir_block *else_block = (nir_block *)exec_list_get_head(&if_stmt->else_list); - ac_build_else(&ctx->ac, else_block->index); - visit_cf_list(ctx, &if_stmt->else_list); - } + ac_build_else(&ctx->ac, else_block->index); + visit_cf_list(ctx, &if_stmt->else_list); + } - ac_build_endif(&ctx->ac, then_block->index); + ac_build_endif(&ctx->ac, then_block->index); } static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop) { - nir_block *first_loop_block = - (nir_block *) exec_list_get_head(&loop->body); + nir_block *first_loop_block = (nir_block *)exec_list_get_head(&loop->body); - ac_build_bgnloop(&ctx->ac, first_loop_block->index); + ac_build_bgnloop(&ctx->ac, first_loop_block->index); - visit_cf_list(ctx, &loop->body); + visit_cf_list(ctx, &loop->body); - ac_build_endloop(&ctx->ac, first_loop_block->index); + ac_build_endloop(&ctx->ac, first_loop_block->index); } -static void visit_cf_list(struct ac_nir_context *ctx, - struct exec_list *list) -{ - foreach_list_typed(nir_cf_node, node, node, list) - { - switch (node->type) { - case nir_cf_node_block: - visit_block(ctx, nir_cf_node_as_block(node)); - break; - - case nir_cf_node_if: - visit_if(ctx, nir_cf_node_as_if(node)); - break; - - case nir_cf_node_loop: - visit_loop(ctx, nir_cf_node_as_loop(node)); - break; - - default: - assert(0); - } - } -} - -void -ac_handle_shader_output_decl(struct ac_llvm_context *ctx, - struct ac_shader_abi *abi, - struct nir_shader *nir, - struct nir_variable *variable, - gl_shader_stage stage) -{ - unsigned output_loc = variable->data.driver_location / 4; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - - /* tess ctrl has it's own load/store paths for outputs */ - if (stage == MESA_SHADER_TESS_CTRL) - return; - - if (stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_EVAL || - stage == MESA_SHADER_GEOMETRY) { - int idx = variable->data.location + variable->data.index; - if (idx == VARYING_SLOT_CLIP_DIST0) { - int length = nir->info.clip_distance_array_size + - nir->info.cull_distance_array_size; - - if (length > 4) - attrib_count = 2; - else - attrib_count = 1; - } - } - - bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type)); - LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32; - for (unsigned i = 0; i < attrib_count; ++i) { - for (unsigned chan = 0; chan < 4; chan++) { - abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] = - ac_build_alloca_undef(ctx, type, ""); - } - } -} - -static void -setup_locals(struct ac_nir_context *ctx, - struct nir_function *func) -{ - int i, j; - ctx->num_locals = 0; - nir_foreach_function_temp_variable(variable, func->impl) { - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - variable->data.driver_location = ctx->num_locals * 4; - variable->data.location_frac = 0; - ctx->num_locals += attrib_count; - } - ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); - if (!ctx->locals) - return; - - for (i = 0; i < ctx->num_locals; i++) { - for (j = 0; j < 4; j++) { - ctx->locals[i * 4 + j] = - ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp"); - } - } -} - -static void -setup_scratch(struct ac_nir_context *ctx, - struct nir_shader *shader) -{ - if (shader->scratch_size == 0) - return; - - ctx->scratch = ac_build_alloca_undef(&ctx->ac, - LLVMArrayType(ctx->ac.i8, shader->scratch_size), - "scratch"); -} - -static void -setup_constant_data(struct ac_nir_context *ctx, - struct nir_shader *shader) -{ - if (!shader->constant_data) - return; - - LLVMValueRef data = - LLVMConstStringInContext(ctx->ac.context, - shader->constant_data, - shader->constant_data_size, - true); - LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size); - - /* We want to put the constant data in the CONST address space so that - * we can use scalar loads. However, LLVM versions before 10 put these - * variables in the same section as the code, which is unacceptable - * for RadeonSI as it needs to relocate all the data sections after - * the code sections. See https://reviews.llvm.org/D65813. - */ - unsigned address_space = - LLVM_VERSION_MAJOR < 10 ? AC_ADDR_SPACE_GLOBAL : AC_ADDR_SPACE_CONST; - - LLVMValueRef global = - LLVMAddGlobalInAddressSpace(ctx->ac.module, type, - "const_data", - address_space); - - LLVMSetInitializer(global, data); - LLVMSetGlobalConstant(global, true); - LLVMSetVisibility(global, LLVMHiddenVisibility); - ctx->constant_data = global; -} - -static void -setup_shared(struct ac_nir_context *ctx, - struct nir_shader *nir) -{ - if (ctx->ac.lds) - return; - - LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, - nir->info.cs.shared_size); - - LLVMValueRef lds = - LLVMAddGlobalInAddressSpace(ctx->ac.module, type, - "compute_lds", - AC_ADDR_SPACE_LDS); - LLVMSetAlignment(lds, 64 * 1024); - - ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, lds, - LLVMPointerType(ctx->ac.i8, - AC_ADDR_SPACE_LDS), ""); +static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) + { + switch (node->type) { + case nir_cf_node_block: + visit_block(ctx, nir_cf_node_as_block(node)); + break; + + case nir_cf_node_if: + visit_if(ctx, nir_cf_node_as_if(node)); + break; + + case nir_cf_node_loop: + visit_loop(ctx, nir_cf_node_as_loop(node)); + break; + + default: + assert(0); + } + } +} + +void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi, + struct nir_shader *nir, struct nir_variable *variable, + gl_shader_stage stage) +{ + unsigned output_loc = variable->data.driver_location / 4; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + + /* tess ctrl has it's own load/store paths for outputs */ + if (stage == MESA_SHADER_TESS_CTRL) + return; + + if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || + stage == MESA_SHADER_GEOMETRY) { + int idx = variable->data.location + variable->data.index; + if (idx == VARYING_SLOT_CLIP_DIST0) { + int length = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size; + + if (length > 4) + attrib_count = 2; + else + attrib_count = 1; + } + } + + bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type)); + LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32; + for (unsigned i = 0; i < attrib_count; ++i) { + for (unsigned chan = 0; chan < 4; chan++) { + abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] = + ac_build_alloca_undef(ctx, type, ""); + } + } +} + +static void setup_locals(struct ac_nir_context *ctx, struct nir_function *func) +{ + int i, j; + ctx->num_locals = 0; + nir_foreach_function_temp_variable(variable, func->impl) + { + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + variable->data.driver_location = ctx->num_locals * 4; + variable->data.location_frac = 0; + ctx->num_locals += attrib_count; + } + ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); + if (!ctx->locals) + return; + + for (i = 0; i < ctx->num_locals; i++) { + for (j = 0; j < 4; j++) { + ctx->locals[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp"); + } + } +} + +static void setup_scratch(struct ac_nir_context *ctx, struct nir_shader *shader) +{ + if (shader->scratch_size == 0) + return; + + ctx->scratch = + ac_build_alloca_undef(&ctx->ac, LLVMArrayType(ctx->ac.i8, shader->scratch_size), "scratch"); +} + +static void setup_constant_data(struct ac_nir_context *ctx, struct nir_shader *shader) +{ + if (!shader->constant_data) + return; + + LLVMValueRef data = LLVMConstStringInContext(ctx->ac.context, shader->constant_data, + shader->constant_data_size, true); + LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size); + + /* We want to put the constant data in the CONST address space so that + * we can use scalar loads. However, LLVM versions before 10 put these + * variables in the same section as the code, which is unacceptable + * for RadeonSI as it needs to relocate all the data sections after + * the code sections. See https://reviews.llvm.org/D65813. + */ + unsigned address_space = LLVM_VERSION_MAJOR < 10 ? AC_ADDR_SPACE_GLOBAL : AC_ADDR_SPACE_CONST; + + LLVMValueRef global = + LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "const_data", address_space); + + LLVMSetInitializer(global, data); + LLVMSetGlobalConstant(global, true); + LLVMSetVisibility(global, LLVMHiddenVisibility); + ctx->constant_data = global; +} + +static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir) +{ + if (ctx->ac.lds) + return; + + LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.cs.shared_size); + + LLVMValueRef lds = + LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS); + LLVMSetAlignment(lds, 64 * 1024); + + ctx->ac.lds = + LLVMBuildBitCast(ctx->ac.builder, lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), ""); } void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, - const struct ac_shader_args *args, struct nir_shader *nir) + const struct ac_shader_args *args, struct nir_shader *nir) { - struct ac_nir_context ctx = {}; - struct nir_function *func; + struct ac_nir_context ctx = {}; + struct nir_function *func; - ctx.ac = *ac; - ctx.abi = abi; - ctx.args = args; - - ctx.stage = nir->info.stage; - ctx.info = &nir->info; - - ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); - - nir_foreach_shader_out_variable(variable, nir) - ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, - ctx.stage); - - ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - if (ctx.abi->kill_ps_if_inf_interp) - ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - func = (struct nir_function *)exec_list_get_head(&nir->functions); - - nir_index_ssa_defs(func->impl); - ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef)); - - setup_locals(&ctx, func); - setup_scratch(&ctx, nir); - setup_constant_data(&ctx, nir); - - if (gl_shader_stage_is_compute(nir->info.stage)) - setup_shared(&ctx, nir); - - if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote) { - ctx.ac.postponed_kill = ac_build_alloca_undef(&ctx.ac, ac->i1, ""); - /* true = don't kill. */ - LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.ac.postponed_kill); - } - - visit_cf_list(&ctx, &func->impl->body); - phi_post_pass(&ctx); - - if (ctx.ac.postponed_kill) - ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder, - ctx.ac.postponed_kill, "")); - - if (!gl_shader_stage_is_compute(nir->info.stage)) - ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS, - ctx.abi->outputs); - - free(ctx.locals); - free(ctx.ssa_defs); - ralloc_free(ctx.defs); - ralloc_free(ctx.phis); - ralloc_free(ctx.vars); - if (ctx.abi->kill_ps_if_inf_interp) - ralloc_free(ctx.verified_interp); -} - -bool -ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) -{ - bool progress = false; - - /* Lower large variables to scratch first so that we won't bloat the - * shader by generating large if ladders for them. We later lower - * scratch to alloca's, assuming LLVM won't generate VGPR indexing. - */ - NIR_PASS(progress, nir, nir_lower_vars_to_scratch, - nir_var_function_temp, - 256, - glsl_get_natural_size_align_bytes); - - /* While it would be nice not to have this flag, we are constrained - * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9. - */ - bool llvm_has_working_vgpr_indexing = chip_class != GFX9; - - /* TODO: Indirect indexing of GS inputs is unimplemented. - * - * TCS and TES load inputs directly from LDS or offchip memory, so - * indirect indexing is trivial. - */ - nir_variable_mode indirect_mask = 0; - if (nir->info.stage == MESA_SHADER_GEOMETRY || - (nir->info.stage != MESA_SHADER_TESS_CTRL && - nir->info.stage != MESA_SHADER_TESS_EVAL && - !llvm_has_working_vgpr_indexing)) { - indirect_mask |= nir_var_shader_in; - } - if (!llvm_has_working_vgpr_indexing && - nir->info.stage != MESA_SHADER_TESS_CTRL) - indirect_mask |= nir_var_shader_out; - - /* TODO: We shouldn't need to do this, however LLVM isn't currently - * smart enough to handle indirects without causing excess spilling - * causing the gpu to hang. - * - * See the following thread for more details of the problem: - * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html - */ - indirect_mask |= nir_var_function_temp; - - progress |= nir_lower_indirect_derefs(nir, indirect_mask); - return progress; -} - -static unsigned -get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) -{ - if (intrin->intrinsic != nir_intrinsic_store_deref) - return 0; - - nir_variable *var = - nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0])); - - if (var->data.mode != nir_var_shader_out) - return 0; - - unsigned writemask = 0; - const int location = var->data.location; - unsigned first_component = var->data.location_frac; - unsigned num_comps = intrin->dest.ssa.num_components; - - if (location == VARYING_SLOT_TESS_LEVEL_INNER) - writemask = ((1 << (num_comps + 1)) - 1) << first_component; - else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) - writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4; - - return writemask; -} - -static void -scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask, - unsigned *cond_block_tf_writemask, - bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf) -{ - switch (cf_node->type) { - case nir_cf_node_block: { - nir_block *block = nir_cf_node_as_block(cf_node); - nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic == nir_intrinsic_control_barrier) { - - /* If we find a barrier in nested control flow put this in the - * too hard basket. In GLSL this is not possible but it is in - * SPIR-V. - */ - if (is_nested_cf) { - *tessfactors_are_def_in_all_invocs = false; - return; - } - - /* The following case must be prevented: - * gl_TessLevelInner = ...; - * barrier(); - * if (gl_InvocationID == 1) - * gl_TessLevelInner = ...; - * - * If you consider disjoint code segments separated by barriers, each - * such segment that writes tess factor channels should write the same - * channels in all codepaths within that segment. - */ - if (upper_block_tf_writemask || cond_block_tf_writemask) { - /* Accumulate the result: */ - *tessfactors_are_def_in_all_invocs &= - !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask)); - - /* Analyze the next code segment from scratch. */ - *upper_block_tf_writemask = 0; - *cond_block_tf_writemask = 0; - } - } else - *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin); - } - - break; - } - case nir_cf_node_if: { - unsigned then_tessfactor_writemask = 0; - unsigned else_tessfactor_writemask = 0; - - nir_if *if_stmt = nir_cf_node_as_if(cf_node); - foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) { - scan_tess_ctrl(nested_node, &then_tessfactor_writemask, - cond_block_tf_writemask, - tessfactors_are_def_in_all_invocs, true); - } - - foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) { - scan_tess_ctrl(nested_node, &else_tessfactor_writemask, - cond_block_tf_writemask, - tessfactors_are_def_in_all_invocs, true); - } - - if (then_tessfactor_writemask || else_tessfactor_writemask) { - /* If both statements write the same tess factor channels, - * we can say that the upper block writes them too. - */ - *upper_block_tf_writemask |= then_tessfactor_writemask & - else_tessfactor_writemask; - *cond_block_tf_writemask |= then_tessfactor_writemask | - else_tessfactor_writemask; - } - - break; - } - case nir_cf_node_loop: { - nir_loop *loop = nir_cf_node_as_loop(cf_node); - foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) { - scan_tess_ctrl(nested_node, cond_block_tf_writemask, - cond_block_tf_writemask, - tessfactors_are_def_in_all_invocs, true); - } - - break; - } - default: - unreachable("unknown cf node type"); - } -} - -bool -ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir) -{ - assert(nir->info.stage == MESA_SHADER_TESS_CTRL); - - /* The pass works as follows: - * If all codepaths write tess factors, we can say that all - * invocations define tess factors. - * - * Each tess factor channel is tracked separately. - */ - unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */ - unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */ - - /* Initial value = true. Here the pass will accumulate results from - * multiple segments surrounded by barriers. If tess factors aren't - * written at all, it's a shader bug and we don't care if this will be - * true. - */ - bool tessfactors_are_def_in_all_invocs = true; - - nir_foreach_function(function, nir) { - if (function->impl) { - foreach_list_typed(nir_cf_node, node, node, &function->impl->body) { - scan_tess_ctrl(node, &main_block_tf_writemask, - &cond_block_tf_writemask, - &tessfactors_are_def_in_all_invocs, - false); - } - } - } - - /* Accumulate the result for the last code segment separated by a - * barrier. - */ - if (main_block_tf_writemask || cond_block_tf_writemask) { - tessfactors_are_def_in_all_invocs &= - !(cond_block_tf_writemask & ~main_block_tf_writemask); - } + ctx.ac = *ac; + ctx.abi = abi; + ctx.args = args; + + ctx.stage = nir->info.stage; + ctx.info = &nir->info; + + ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); + + nir_foreach_shader_out_variable(variable, nir) + ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, ctx.stage); + + ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + + if (ctx.abi->kill_ps_if_inf_interp) + ctx.verified_interp = + _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + + func = (struct nir_function *)exec_list_get_head(&nir->functions); + + nir_index_ssa_defs(func->impl); + ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef)); + + setup_locals(&ctx, func); + setup_scratch(&ctx, nir); + setup_constant_data(&ctx, nir); + + if (gl_shader_stage_is_compute(nir->info.stage)) + setup_shared(&ctx, nir); + + if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote) { + ctx.ac.postponed_kill = ac_build_alloca_undef(&ctx.ac, ac->i1, ""); + /* true = don't kill. */ + LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.ac.postponed_kill); + } + + visit_cf_list(&ctx, &func->impl->body); + phi_post_pass(&ctx); + + if (ctx.ac.postponed_kill) + ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder, ctx.ac.postponed_kill, "")); + + if (!gl_shader_stage_is_compute(nir->info.stage)) + ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS, ctx.abi->outputs); + + free(ctx.locals); + free(ctx.ssa_defs); + ralloc_free(ctx.defs); + ralloc_free(ctx.phis); + ralloc_free(ctx.vars); + if (ctx.abi->kill_ps_if_inf_interp) + ralloc_free(ctx.verified_interp); +} + +bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) +{ + bool progress = false; + + /* Lower large variables to scratch first so that we won't bloat the + * shader by generating large if ladders for them. We later lower + * scratch to alloca's, assuming LLVM won't generate VGPR indexing. + */ + NIR_PASS(progress, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256, + glsl_get_natural_size_align_bytes); + + /* While it would be nice not to have this flag, we are constrained + * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9. + */ + bool llvm_has_working_vgpr_indexing = chip_class != GFX9; + + /* TODO: Indirect indexing of GS inputs is unimplemented. + * + * TCS and TES load inputs directly from LDS or offchip memory, so + * indirect indexing is trivial. + */ + nir_variable_mode indirect_mask = 0; + if (nir->info.stage == MESA_SHADER_GEOMETRY || + (nir->info.stage != MESA_SHADER_TESS_CTRL && nir->info.stage != MESA_SHADER_TESS_EVAL && + !llvm_has_working_vgpr_indexing)) { + indirect_mask |= nir_var_shader_in; + } + if (!llvm_has_working_vgpr_indexing && nir->info.stage != MESA_SHADER_TESS_CTRL) + indirect_mask |= nir_var_shader_out; + + /* TODO: We shouldn't need to do this, however LLVM isn't currently + * smart enough to handle indirects without causing excess spilling + * causing the gpu to hang. + * + * See the following thread for more details of the problem: + * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html + */ + indirect_mask |= nir_var_function_temp; + + progress |= nir_lower_indirect_derefs(nir, indirect_mask); + return progress; +} + +static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) +{ + if (intrin->intrinsic != nir_intrinsic_store_deref) + return 0; + + nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0])); + + if (var->data.mode != nir_var_shader_out) + return 0; + + unsigned writemask = 0; + const int location = var->data.location; + unsigned first_component = var->data.location_frac; + unsigned num_comps = intrin->dest.ssa.num_components; + + if (location == VARYING_SLOT_TESS_LEVEL_INNER) + writemask = ((1 << (num_comps + 1)) - 1) << first_component; + else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) + writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4; + + return writemask; +} + +static void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask, + unsigned *cond_block_tf_writemask, + bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf) +{ + switch (cf_node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(cf_node); + nir_foreach_instr (instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_control_barrier) { + + /* If we find a barrier in nested control flow put this in the + * too hard basket. In GLSL this is not possible but it is in + * SPIR-V. + */ + if (is_nested_cf) { + *tessfactors_are_def_in_all_invocs = false; + return; + } + + /* The following case must be prevented: + * gl_TessLevelInner = ...; + * barrier(); + * if (gl_InvocationID == 1) + * gl_TessLevelInner = ...; + * + * If you consider disjoint code segments separated by barriers, each + * such segment that writes tess factor channels should write the same + * channels in all codepaths within that segment. + */ + if (*upper_block_tf_writemask || *cond_block_tf_writemask) { + /* Accumulate the result: */ + *tessfactors_are_def_in_all_invocs &= + !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask)); + + /* Analyze the next code segment from scratch. */ + *upper_block_tf_writemask = 0; + *cond_block_tf_writemask = 0; + } + } else + *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin); + } + + break; + } + case nir_cf_node_if: { + unsigned then_tessfactor_writemask = 0; + unsigned else_tessfactor_writemask = 0; + + nir_if *if_stmt = nir_cf_node_as_if(cf_node); + foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) + { + scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) + { + scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + if (then_tessfactor_writemask || else_tessfactor_writemask) { + /* If both statements write the same tess factor channels, + * we can say that the upper block writes them too. + */ + *upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask; + *cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask; + } + + break; + } + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(cf_node); + foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) + { + scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + break; + } + default: + unreachable("unknown cf node type"); + } +} + +bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_TESS_CTRL); + + /* The pass works as follows: + * If all codepaths write tess factors, we can say that all + * invocations define tess factors. + * + * Each tess factor channel is tracked separately. + */ + unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */ + unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */ + + /* Initial value = true. Here the pass will accumulate results from + * multiple segments surrounded by barriers. If tess factors aren't + * written at all, it's a shader bug and we don't care if this will be + * true. + */ + bool tessfactors_are_def_in_all_invocs = true; + + nir_foreach_function (function, nir) { + if (function->impl) { + foreach_list_typed(nir_cf_node, node, node, &function->impl->body) + { + scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask, + &tessfactors_are_def_in_all_invocs, false); + } + } + } + + /* Accumulate the result for the last code segment separated by a + * barrier. + */ + if (main_block_tf_writemask || cond_block_tf_writemask) { + tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask); + } - return tessfactors_are_def_in_all_invocs; + return tessfactors_are_def_in_all_invocs; } diff -Nru mesa-20.2.1/src/amd/llvm/ac_nir_to_llvm.h mesa-20.2.6/src/amd/llvm/ac_nir_to_llvm.h --- mesa-20.2.1/src/amd/llvm/ac_nir_to_llvm.h 2020-10-14 17:19:10.296516200 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_nir_to_llvm.h 2020-12-16 21:42:03.544110000 +0000 @@ -24,11 +24,12 @@ #ifndef AC_NIR_TO_LLVM_H #define AC_NIR_TO_LLVM_H -#include -#include "llvm-c/Core.h" -#include "llvm-c/TargetMachine.h" #include "amd_family.h" #include "compiler/shader_enums.h" +#include "llvm-c/Core.h" +#include "llvm-c/TargetMachine.h" + +#include struct nir_shader; struct nir_variable; @@ -37,13 +38,13 @@ struct ac_shader_args; /* Interpolation locations */ -#define INTERP_CENTER 0 +#define INTERP_CENTER 0 #define INTERP_CENTROID 1 -#define INTERP_SAMPLE 2 +#define INTERP_SAMPLE 2 static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan) { - return (index * 4) + chan; + return (index * 4) + chan; } bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); @@ -51,14 +52,11 @@ bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir); void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, - const struct ac_shader_args *args, struct nir_shader *nir); + const struct ac_shader_args *args, struct nir_shader *nir); -void -ac_handle_shader_output_decl(struct ac_llvm_context *ctx, - struct ac_shader_abi *abi, - struct nir_shader *nir, - struct nir_variable *variable, - gl_shader_stage stage); +void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi, + struct nir_shader *nir, struct nir_variable *variable, + gl_shader_stage stage); void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage); diff -Nru mesa-20.2.1/src/amd/llvm/ac_shader_abi.h mesa-20.2.6/src/amd/llvm/ac_shader_abi.h --- mesa-20.2.1/src/amd/llvm/ac_shader_abi.h 2020-10-14 17:19:10.296516200 +0000 +++ mesa-20.2.6/src/amd/llvm/ac_shader_abi.h 2020-12-16 21:42:03.544110000 +0000 @@ -24,11 +24,11 @@ #ifndef AC_SHADER_ABI_H #define AC_SHADER_ABI_H -#include -#include #include "ac_shader_args.h" - #include "compiler/shader_enums.h" +#include + +#include struct nir_variable; @@ -36,165 +36,135 @@ #define AC_MAX_INLINE_PUSH_CONSTS 8 -enum ac_descriptor_type { - AC_DESC_IMAGE, - AC_DESC_FMASK, - AC_DESC_SAMPLER, - AC_DESC_BUFFER, - AC_DESC_PLANE_0, - AC_DESC_PLANE_1, - AC_DESC_PLANE_2, +enum ac_descriptor_type +{ + AC_DESC_IMAGE, + AC_DESC_FMASK, + AC_DESC_SAMPLER, + AC_DESC_BUFFER, + AC_DESC_PLANE_0, + AC_DESC_PLANE_1, + AC_DESC_PLANE_2, }; /* Document the shader ABI during compilation. This is what allows radeonsi and * radv to share a compiler backend. */ struct ac_shader_abi { - LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4]; + LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4]; - /* These input registers sometimes need to be fixed up. */ - LLVMValueRef vertex_id; - LLVMValueRef instance_id; - LLVMValueRef persp_centroid, linear_centroid; - LLVMValueRef color0, color1; - LLVMValueRef user_data; - - /* For VS and PS: pre-loaded shader inputs. - * - * Currently only used for NIR shaders; indexed by variables' - * driver_location. - */ - LLVMValueRef *inputs; - - /* Varying -> attribute number mapping. Also NIR-only */ - unsigned fs_input_attr_indices[MAX_VARYING]; - - void (*emit_outputs)(struct ac_shader_abi *abi, - unsigned max_outputs, - LLVMValueRef *addrs); - - void (*emit_vertex)(struct ac_shader_abi *abi, - unsigned stream, - LLVMValueRef *addrs); - - void (*emit_primitive)(struct ac_shader_abi *abi, - unsigned stream); - - void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, - unsigned stream, - LLVMValueRef vertexidx, - LLVMValueRef *addrs); - - LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - unsigned vertex_index, - unsigned const_index, - LLVMTypeRef type); - - LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, - LLVMTypeRef type, - LLVMValueRef vertex_index, - LLVMValueRef param_index, - unsigned const_index, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - bool is_patch, - bool is_compact, - bool load_inputs); - - void (*store_tcs_outputs)(struct ac_shader_abi *abi, - const struct nir_variable *var, - LLVMValueRef vertex_index, - LLVMValueRef param_index, - unsigned const_index, - LLVMValueRef src, - unsigned writemask); - - LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi); - - LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi); - - LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, - unsigned varying_id, - bool load_default_state); - - - LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index); - - /** - * Load the descriptor for the given buffer. - * - * \param buffer the buffer as presented in NIR: this is the descriptor - * in Vulkan, and the buffer index in OpenGL/Gallium - * \param write whether buffer contents will be written - */ - LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi, - LLVMValueRef buffer, bool write); - - /** - * Load a descriptor associated to a sampler. - * - * \param descriptor_set the descriptor set index (only for Vulkan) - * \param base_index the base index of the sampler variable - * \param constant_index constant part of an array index (or 0, if the - * sampler variable is not an array) - * \param index non-constant part of an array index (may be NULL) - * \param desc_type the type of descriptor to load - * \param image whether the descriptor is loaded for an image operation - */ - LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, - unsigned descriptor_set, - unsigned base_index, - unsigned constant_index, - LLVMValueRef index, - enum ac_descriptor_type desc_type, - bool image, bool write, - bool bindless); - - /** - * Load a Vulkan-specific resource. - * - * \param index resource index - * \param desc_set descriptor set - * \param binding descriptor set binding - */ - LLVMValueRef (*load_resource)(struct ac_shader_abi *abi, - LLVMValueRef index, - unsigned desc_set, - unsigned binding); - - LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, - LLVMValueRef sample_id); - - LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi); - - LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi); - - LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi); - - LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi); - - /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently - * uses it due to promoting D16 to D32, but radv needs it off. */ - bool clamp_shadow_reference; - bool interp_at_sample_force_center; - - /* Whether bounds checks are required */ - bool robust_buffer_access; + /* These input registers sometimes need to be fixed up. */ + LLVMValueRef vertex_id; + LLVMValueRef instance_id; + LLVMValueRef persp_centroid, linear_centroid; + LLVMValueRef color0, color1; + LLVMValueRef user_data; + + /* For VS and PS: pre-loaded shader inputs. + * + * Currently only used for NIR shaders; indexed by variables' + * driver_location. + */ + LLVMValueRef *inputs; + + /* Varying -> attribute number mapping. Also NIR-only */ + unsigned fs_input_attr_indices[MAX_VARYING]; + + void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs); + + void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs); + + void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream); + + void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs); + + LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi, unsigned location, + unsigned driver_location, unsigned component, + unsigned num_components, unsigned vertex_index, unsigned const_index, + LLVMTypeRef type); + + LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type, + LLVMValueRef vertex_index, LLVMValueRef param_index, + unsigned const_index, unsigned location, + unsigned driver_location, unsigned component, + unsigned num_components, bool is_patch, bool is_compact, + bool load_inputs); + + void (*store_tcs_outputs)(struct ac_shader_abi *abi, const struct nir_variable *var, + LLVMValueRef vertex_index, LLVMValueRef param_index, + unsigned const_index, LLVMValueRef src, unsigned writemask); + + LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi); + + LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi); + + LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, unsigned varying_id, + bool load_default_state); + + LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index); + + /** + * Load the descriptor for the given buffer. + * + * \param buffer the buffer as presented in NIR: this is the descriptor + * in Vulkan, and the buffer index in OpenGL/Gallium + * \param write whether buffer contents will be written + */ + LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi, LLVMValueRef buffer, bool write); + + /** + * Load a descriptor associated to a sampler. + * + * \param descriptor_set the descriptor set index (only for Vulkan) + * \param base_index the base index of the sampler variable + * \param constant_index constant part of an array index (or 0, if the + * sampler variable is not an array) + * \param index non-constant part of an array index (may be NULL) + * \param desc_type the type of descriptor to load + * \param image whether the descriptor is loaded for an image operation + */ + LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, unsigned descriptor_set, + unsigned base_index, unsigned constant_index, + LLVMValueRef index, enum ac_descriptor_type desc_type, + bool image, bool write, bool bindless); + + /** + * Load a Vulkan-specific resource. + * + * \param index resource index + * \param desc_set descriptor set + * \param binding descriptor set binding + */ + LLVMValueRef (*load_resource)(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set, + unsigned binding); + + LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, LLVMValueRef sample_id); + + LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi); + + LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi); + + LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi); + + LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi); + + /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently + * uses it due to promoting D16 to D32, but radv needs it off. */ + bool clamp_shadow_reference; + bool interp_at_sample_force_center; + + /* Whether bounds checks are required */ + bool robust_buffer_access; - /* Check for Inf interpolation coeff */ - bool kill_ps_if_inf_interp; + /* Check for Inf interpolation coeff */ + bool kill_ps_if_inf_interp; - /* Whether undef values must be converted to zero */ - bool convert_undef_to_zero; + /* Whether undef values must be converted to zero */ + bool convert_undef_to_zero; - /* Clamp div by 0 (so it won't produce NaN) */ - bool clamp_div_by_zero; + /* Clamp div by 0 (so it won't produce NaN) */ + bool clamp_div_by_zero; }; #endif /* AC_SHADER_ABI_H */ diff -Nru mesa-20.2.1/src/amd/vulkan/radv_cmd_buffer.c mesa-20.2.6/src/amd/vulkan/radv_cmd_buffer.c --- mesa-20.2.1/src/amd/vulkan/radv_cmd_buffer.c 2020-10-14 17:19:10.299849500 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_cmd_buffer.c 2020-12-16 21:42:03.556110000 +0000 @@ -2222,6 +2222,74 @@ } } +/* GFX9+ metadata cache flushing workaround. metadata cache coherency is + * broken if the CB caches data of multiple mips of the same image at the + * same time. + * + * Insert some flushes to avoid this. + */ +static void +radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer) +{ + struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + bool color_mip_changed = false; + + /* Entire workaround is not applicable before GFX9 */ + if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) + return; + + if (!framebuffer) + return; + + for (int i = 0; i < subpass->color_count; ++i) { + int idx = subpass->color_attachments[i].attachment; + if (idx == VK_ATTACHMENT_UNUSED) + continue; + + struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; + + if ((radv_image_has_CB_metadata(iview->image) || + radv_image_has_dcc(iview->image)) && + cmd_buffer->state.cb_mip[i] != iview->base_mip) + color_mip_changed = true; + + cmd_buffer->state.cb_mip[i] = iview->base_mip; + } + + if (color_mip_changed) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + } +} + +/* This function does the flushes for mip changes if the levels are not zero for + * all render targets. This way we can assume at the start of the next cmd_buffer + * that rendering to mip 0 doesn't need any flushes. As that is the most common + * case that saves some flushes. */ +static void +radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer) +{ + /* Entire workaround is not applicable before GFX9 */ + if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) + return; + + bool need_color_mip_flush = false; + for (unsigned i = 0; i < 8; ++i) { + if (cmd_buffer->state.cb_mip[i]) { + need_color_mip_flush = true; + break; + } + } + + if (need_color_mip_flush) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + } + + memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip)); +} + static void radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) { @@ -3842,7 +3910,6 @@ radv_set_descriptor_set(cmd_buffer, bind_point, set, idx); assert(set); - assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); if (!cmd_buffer->device->use_global_bo_list) { for (unsigned j = 0; j < set->buffer_count; ++j) @@ -3873,17 +3940,17 @@ radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); for (unsigned i = 0; i < descriptorSetCount; ++i) { - unsigned idx = i + firstSet; + unsigned set_idx = i + firstSet; RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); /* If the set is already bound we only need to update the * (potentially changed) dynamic offsets. */ - if (descriptors_state->sets[idx] != set || - !(descriptors_state->valid & (1u << idx))) { - radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, idx); + if (descriptors_state->sets[set_idx] != set || + !(descriptors_state->valid & (1u << set_idx))) { + radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx); } - for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) { + for(unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) { unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4; assert(dyn_idx < dynamicOffsetCount); @@ -3912,8 +3979,7 @@ } } - cmd_buffer->push_constant_stages |= - set->layout->dynamic_shader_stages; + cmd_buffer->push_constant_stages |= layout->set[set_idx].dynamic_offset_stages; } } } @@ -4063,6 +4129,8 @@ { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + radv_emit_mip_change_flush_default(cmd_buffer); + if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6) cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2; @@ -4173,14 +4241,15 @@ /* Prefetch all pipeline shaders at first draw time. */ cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; - if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 && + if ((cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 || + cmd_buffer->device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID) && cmd_buffer->state.emitted_pipeline && radv_pipeline_has_ngg(cmd_buffer->state.emitted_pipeline) && !radv_pipeline_has_ngg(cmd_buffer->state.pipeline)) { /* Transitioning from NGG to legacy GS requires - * VGT_FLUSH on Navi10-14. VGT_FLUSH is also emitted - * at the beginning of IBs when legacy GS ring pointers - * are set. + * VGT_FLUSH on GFX10 and Sienna Cichlid. VGT_FLUSH + * is also emitted at the beginning of IBs when legacy + * GS ring pointers are set. */ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH; } @@ -4642,6 +4711,8 @@ assert(commandBufferCount > 0); + radv_emit_mip_change_flush_default(primary); + /* Emit pending flushes on primary prior to executing secondary */ si_emit_cache_flush(primary); @@ -4674,6 +4745,7 @@ * has been recorded without a framebuffer, otherwise * fast color/depth clears can't work. */ + radv_emit_fb_mip_change_flush(primary); radv_emit_framebuffer_state(primary); } @@ -5281,6 +5353,10 @@ radv_describe_draw(cmd_buffer); + /* Need to apply this workaround early as it can set flush flags. */ + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) + radv_emit_fb_mip_change_flush(cmd_buffer); + /* Use optimal packet order based on whether we need to sync the * pipeline. */ diff -Nru mesa-20.2.1/src/amd/vulkan/radv_descriptor_set.c mesa-20.2.6/src/amd/vulkan/radv_descriptor_set.c --- mesa-20.2.1/src/amd/vulkan/radv_descriptor_set.c 2020-10-14 17:19:10.299849500 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_descriptor_set.c 2020-12-16 21:42:03.556110000 +0000 @@ -432,10 +432,16 @@ layout->set[set].layout = set_layout; layout->set[set].dynamic_offset_start = dynamic_offset_count; + layout->set[set].dynamic_offset_count = 0; + layout->set[set].dynamic_offset_stages = 0; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { - dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count; - dynamic_shader_stages |= set_layout->dynamic_shader_stages; + layout->set[set].dynamic_offset_count += + set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count; + layout->set[set].dynamic_offset_stages |= set_layout->dynamic_shader_stages; } + dynamic_offset_count += layout->set[set].dynamic_offset_count; + dynamic_shader_stages |= layout->set[set].dynamic_offset_stages; _mesa_sha1_update(&ctx, set_layout, set_layout->layout_size); } diff -Nru mesa-20.2.1/src/amd/vulkan/radv_descriptor_set.h mesa-20.2.6/src/amd/vulkan/radv_descriptor_set.h --- mesa-20.2.1/src/amd/vulkan/radv_descriptor_set.h 2020-10-14 17:19:10.299849500 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_descriptor_set.h 2020-12-16 21:42:03.556110000 +0000 @@ -89,7 +89,9 @@ struct { struct radv_descriptor_set_layout *layout; uint32_t size; - uint32_t dynamic_offset_start; + uint16_t dynamic_offset_start; + uint16_t dynamic_offset_count; + VkShaderStageFlags dynamic_offset_stages; } set[MAX_SETS]; uint32_t num_sets; diff -Nru mesa-20.2.1/src/amd/vulkan/radv_device.c mesa-20.2.6/src/amd/vulkan/radv_device.c --- mesa-20.2.1/src/amd/vulkan/radv_device.c 2020-10-14 17:19:10.299849500 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_device.c 2020-12-16 21:42:03.557110000 +0000 @@ -139,6 +139,13 @@ return device->rad_info.vram_size - radv_get_visible_vram_size(device); } +enum radv_heap { + RADV_HEAP_VRAM = 1 << 0, + RADV_HEAP_GTT = 1 << 1, + RADV_HEAP_VRAM_VIS = 1 << 2, + RADV_HEAP_MAX = 1 << 3, +}; + static void radv_physical_device_init_mem_types(struct radv_physical_device *device) { @@ -146,8 +153,13 @@ uint64_t vram_size = radv_get_vram_size(device); int vram_index = -1, visible_vram_index = -1, gart_index = -1; device->memory_properties.memoryHeapCount = 0; - if (vram_size > 0) { + device->heaps = 0; + + /* Only get a VRAM heap if it is significant, not if it is a 16 MiB + * remainder above visible VRAM. */ + if (vram_size > 0 && vram_size * 9 >= visible_vram_size) { vram_index = device->memory_properties.memoryHeapCount++; + device->heaps |= RADV_HEAP_VRAM; device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) { .size = vram_size, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, @@ -156,6 +168,7 @@ if (device->rad_info.gart_size > 0) { gart_index = device->memory_properties.memoryHeapCount++; + device->heaps |= RADV_HEAP_GTT; device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) { .size = device->rad_info.gart_size, .flags = 0, @@ -164,6 +177,7 @@ if (visible_vram_size) { visible_vram_index = device->memory_properties.memoryHeapCount++; + device->heaps |= RADV_HEAP_VRAM_VIS; device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) { .size = visible_vram_size, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, @@ -370,7 +384,8 @@ disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags); - if (device->rad_info.chip_class < GFX8) + if (device->rad_info.chip_class < GFX8 || + device->rad_info.chip_class > GFX10) fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); radv_get_driver_uuid(&device->driver_uuid); @@ -939,7 +954,7 @@ .depthBounds = true, .wideLines = true, .largePoints = true, - .alphaToOne = true, + .alphaToOne = false, .multiViewport = true, .samplerAnisotropy = true, .textureCompressionETC2 = radv_device_supports_etc(pdevice), @@ -2172,10 +2187,6 @@ { RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties; - uint64_t visible_vram_size = radv_get_visible_vram_size(device); - uint64_t vram_size = radv_get_vram_size(device); - uint64_t gtt_size = device->rad_info.gart_size; - uint64_t heap_budget, heap_usage; /* For all memory heaps, the computation of budget is as follow: * heap_budget = heap_size - global_heap_usage + app_heap_usage @@ -2186,44 +2197,39 @@ * Note that the application heap usages are not really accurate (eg. * in presence of shared buffers). */ - for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) { - uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex; - - if ((device->memory_domains[i] & RADEON_DOMAIN_VRAM) && (device->memory_flags[i] & RADEON_FLAG_NO_CPU_ACCESS)) { - heap_usage = device->ws->query_value(device->ws, - RADEON_ALLOCATED_VRAM); - - heap_budget = vram_size - - device->ws->query_value(device->ws, RADEON_VRAM_USAGE) + - heap_usage; - - memoryBudget->heapBudget[heap_index] = heap_budget; - memoryBudget->heapUsage[heap_index] = heap_usage; - } else if (device->memory_domains[i] & RADEON_DOMAIN_VRAM) { - heap_usage = device->ws->query_value(device->ws, - RADEON_ALLOCATED_VRAM_VIS); - - heap_budget = visible_vram_size - - device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + - heap_usage; - - memoryBudget->heapBudget[heap_index] = heap_budget; - memoryBudget->heapUsage[heap_index] = heap_usage; - } else { - assert(device->memory_domains[i] & RADEON_DOMAIN_GTT); - - heap_usage = device->ws->query_value(device->ws, - RADEON_ALLOCATED_GTT); - - heap_budget = gtt_size - - device->ws->query_value(device->ws, RADEON_GTT_USAGE) + - heap_usage; - - memoryBudget->heapBudget[heap_index] = heap_budget; - memoryBudget->heapUsage[heap_index] = heap_usage; + unsigned mask = device->heaps; + unsigned heap = 0; + while (mask) { + uint64_t internal_usage = 0, total_usage = 0; + unsigned type = 1u << u_bit_scan(&mask); + + switch(type) { + case RADV_HEAP_VRAM: + internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); + total_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE); + break; + case RADV_HEAP_VRAM_VIS: + internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS); + if (!(device->heaps & RADV_HEAP_VRAM)) + internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); + total_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE); + break; + case RADV_HEAP_GTT: + internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT); + total_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE); + break; } + + uint64_t free_space = device->memory_properties.memoryHeaps[heap].size - + MIN2(device->memory_properties.memoryHeaps[heap].size, + total_usage); + memoryBudget->heapBudget[heap] = free_space + internal_usage; + memoryBudget->heapUsage[heap] = internal_usage; + ++heap; } + assert(heap == memory_properties->memoryHeapCount); + /* The heapBudget and heapUsage values must be zero for array elements * greater than or equal to * VkPhysicalDeviceMemoryProperties::memoryHeapCount. @@ -2778,6 +2784,12 @@ abort(); } + if (device->physical_device->rad_info.chip_class > GFX10) { + fprintf(stderr, "radv: Thread trace is not supported " + "for that GPU!\n"); + exit(1); + } + /* Default buffer size set to 1MB per SE. */ device->thread_trace_buffer_size = radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024); @@ -4949,9 +4961,8 @@ { struct radeon_bo_metadata metadata; - if (memory->image) { - if (memory->image->tiling != VK_IMAGE_TILING_LINEAR) - radv_init_metadata(device, memory->image, &metadata); + if (memory->image && memory->image->tiling != VK_IMAGE_TILING_LINEAR) { + radv_init_metadata(device, memory->image, &metadata); device->ws->buffer_set_metadata(memory->bo, &metadata); } @@ -5082,7 +5093,8 @@ } if (mem->image && mem->image->plane_count == 1 && - !vk_format_is_depth_or_stencil(mem->image->vk_format)) { + !vk_format_is_depth_or_stencil(mem->image->vk_format) && + mem->image->info.samples == 1) { struct radeon_bo_metadata metadata; device->ws->buffer_get_metadata(mem->bo, &metadata); diff -Nru mesa-20.2.1/src/amd/vulkan/radv_formats.c mesa-20.2.6/src/amd/vulkan/radv_formats.c --- mesa-20.2.1/src/amd/vulkan/radv_formats.c 2020-10-14 17:19:10.303182800 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_formats.c 2020-12-16 21:42:03.558110000 +0000 @@ -624,6 +624,7 @@ { return physical_device->rad_info.family == CHIP_VEGA10 || physical_device->rad_info.family == CHIP_RAVEN || + physical_device->rad_info.family == CHIP_RAVEN2 || physical_device->rad_info.family == CHIP_STONEY; } diff -Nru mesa-20.2.1/src/amd/vulkan/radv_image.c mesa-20.2.6/src/amd/vulkan/radv_image.c --- mesa-20.2.1/src/amd/vulkan/radv_image.c 2020-10-14 17:19:10.303182800 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_image.c 2020-12-16 21:42:03.558110000 +0000 @@ -854,49 +854,51 @@ } /* Initialize the sampler view for FMASK. */ - if (radv_image_has_fmask(image)) { - uint64_t gpu_address = radv_buffer_get_va(image->bo); - uint32_t format; - uint64_t va; - - assert(image->plane_count == 1); - - va = gpu_address + image->offset + image->planes[0].surface.fmask_offset; - - switch (image->info.samples) { - case 2: - format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2; - break; - case 4: - format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4; - break; - case 8: - format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8; - break; - default: - unreachable("invalid nr_samples"); - } + if (fmask_state) { + if (radv_image_has_fmask(image)) { + uint64_t gpu_address = radv_buffer_get_va(image->bo); + uint32_t format; + uint64_t va; + + assert(image->plane_count == 1); + + va = gpu_address + image->offset + image->planes[0].surface.fmask_offset; + + switch (image->info.samples) { + case 2: + format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2; + break; + case 4: + format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4; + break; + case 8: + format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8; + break; + default: + unreachable("invalid nr_samples"); + } - fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle; - fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | - S_00A004_FORMAT(format) | - S_00A004_WIDTH_LO(width - 1); - fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | - S_00A008_HEIGHT(height - 1) | - S_00A008_RESOURCE_LEVEL(1); - fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | - S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | - S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | - S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | - S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) | - S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false)); - fmask_state[4] = S_00A010_DEPTH(last_layer) | - S_00A010_BASE_ARRAY(first_layer); - fmask_state[5] = 0; - fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); - fmask_state[7] = 0; - } else if (fmask_state) - memset(fmask_state, 0, 8 * 4); + fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle; + fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | + S_00A004_FORMAT(format) | + S_00A004_WIDTH_LO(width - 1); + fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | + S_00A008_HEIGHT(height - 1) | + S_00A008_RESOURCE_LEVEL(1); + fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | + S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | + S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | + S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | + S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) | + S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false)); + fmask_state[4] = S_00A010_DEPTH(last_layer) | + S_00A010_BASE_ARRAY(first_layer); + fmask_state[5] = 0; + fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); + fmask_state[7] = 0; + } else + memset(fmask_state, 0, 8 * 4); + } } /** @@ -1018,94 +1020,96 @@ } /* Initialize the sampler view for FMASK. */ - if (radv_image_has_fmask(image)) { - uint32_t fmask_format, num_format; - uint64_t gpu_address = radv_buffer_get_va(image->bo); - uint64_t va; - - assert(image->plane_count == 1); - - va = gpu_address + image->offset + image->planes[0].surface.fmask_offset; - - if (device->physical_device->rad_info.chip_class == GFX9) { - fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK; - switch (image->info.samples) { - case 2: - num_format = V_008F14_IMG_FMASK_8_2_2; - break; - case 4: - num_format = V_008F14_IMG_FMASK_8_4_4; - break; - case 8: - num_format = V_008F14_IMG_FMASK_32_8_8; - break; - default: - unreachable("invalid nr_samples"); + if (fmask_state) { + if (radv_image_has_fmask(image)) { + uint32_t fmask_format, num_format; + uint64_t gpu_address = radv_buffer_get_va(image->bo); + uint64_t va; + + assert(image->plane_count == 1); + + va = gpu_address + image->offset + image->planes[0].surface.fmask_offset; + + if (device->physical_device->rad_info.chip_class == GFX9) { + fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK; + switch (image->info.samples) { + case 2: + num_format = V_008F14_IMG_FMASK_8_2_2; + break; + case 4: + num_format = V_008F14_IMG_FMASK_8_4_4; + break; + case 8: + num_format = V_008F14_IMG_FMASK_32_8_8; + break; + default: + unreachable("invalid nr_samples"); + } + } else { + switch (image->info.samples) { + case 2: + fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; + break; + case 4: + fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; + break; + case 8: + fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; + break; + default: + assert(0); + fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; + } + num_format = V_008F14_IMG_NUM_FORMAT_UINT; } - } else { - switch (image->info.samples) { - case 2: - fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; - break; - case 4: - fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; - break; - case 8: - fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; - break; - default: - assert(0); - fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; - } - num_format = V_008F14_IMG_NUM_FORMAT_UINT; - } - fmask_state[0] = va >> 8; - fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle; - fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | - S_008F14_DATA_FORMAT(fmask_format) | - S_008F14_NUM_FORMAT(num_format); - fmask_state[2] = S_008F18_WIDTH(width - 1) | - S_008F18_HEIGHT(height - 1); - fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | - S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | - S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | - S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | - S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false)); - fmask_state[4] = 0; - fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); - fmask_state[6] = 0; - fmask_state[7] = 0; - - if (device->physical_device->rad_info.chip_class == GFX9) { - fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode); - fmask_state[4] |= S_008F20_DEPTH(last_layer) | - S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch); - fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | - S_008F24_META_RB_ALIGNED(1); - - if (radv_image_is_tc_compat_cmask(image)) { - va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; - - fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40); - fmask_state[6] |= S_008F28_COMPRESSION_EN(1); - fmask_state[7] |= va >> 8; + fmask_state[0] = va >> 8; + fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle; + fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | + S_008F14_DATA_FORMAT(fmask_format) | + S_008F14_NUM_FORMAT(num_format); + fmask_state[2] = S_008F18_WIDTH(width - 1) | + S_008F18_HEIGHT(height - 1); + fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | + S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false)); + fmask_state[4] = 0; + fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); + fmask_state[6] = 0; + fmask_state[7] = 0; + + if (device->physical_device->rad_info.chip_class == GFX9) { + fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode); + fmask_state[4] |= S_008F20_DEPTH(last_layer) | + S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch); + fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | + S_008F24_META_RB_ALIGNED(1); + + if (radv_image_is_tc_compat_cmask(image)) { + va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; + + fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40); + fmask_state[6] |= S_008F28_COMPRESSION_EN(1); + fmask_state[7] |= va >> 8; + } + } else { + fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index); + fmask_state[4] |= S_008F20_DEPTH(depth - 1) | + S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1); + fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); + + if (radv_image_is_tc_compat_cmask(image)) { + va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; + + fmask_state[6] |= S_008F28_COMPRESSION_EN(1); + fmask_state[7] |= va >> 8; + } } - } else { - fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index); - fmask_state[4] |= S_008F20_DEPTH(depth - 1) | - S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1); - fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); - - if (radv_image_is_tc_compat_cmask(image)) { - va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; - - fmask_state[6] |= S_008F28_COMPRESSION_EN(1); - fmask_state[7] |= va >> 8; - } - } - } else if (fmask_state) - memset(fmask_state, 0, 8 * 4); + } else + memset(fmask_state, 0, 8 * 4); + } } static void @@ -1597,6 +1601,11 @@ iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT; + iview->base_layer = range->baseArrayLayer; + iview->layer_count = radv_get_layerCount(image, range); + iview->base_mip = range->baseMipLevel; + iview->level_count = radv_get_levelCount(image, range); + iview->vk_format = pCreateInfo->format; /* If the image has an Android external format, pCreateInfo->format will be @@ -1652,22 +1661,44 @@ * * This means that mip2 will be missing texels. * - * Fix it by taking the actual extent addrlib assigned to the base mip level. + * Fix this by calculating the base mip's width and height, then convert + * that, and round it back up to get the level 0 size. Clamp the + * converted size between the original values, and the physical extent + * of the base mipmap. + * + * On GFX10 we have to take care to not go over the physical extent + * of the base mipmap as otherwise the GPU computes a different layout. + * Note that the GPU does use the same base-mip dimensions for both a + * block compatible format and the compressed format, so even if we take + * the plain converted dimensions the physical layout is correct. */ if (device->physical_device->rad_info.chip_class >= GFX9 && - vk_format_is_compressed(image->vk_format) && - !vk_format_is_compressed(iview->vk_format) && - iview->image->info.levels > 1) { - iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width; - iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height; - } + vk_format_is_compressed(image->vk_format) && + !vk_format_is_compressed(iview->vk_format)) { + /* If we have multiple levels in the view we should ideally take the last level, + * but the mip calculation has a max(..., 1) so walking back to the base mip in an + * useful way is hard. */ + if (iview->level_count > 1) { + iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width; + iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height; + } else { + unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel); + unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel); + + lvl_width = round_up_u32(lvl_width * view_bw, img_bw); + lvl_height = round_up_u32(lvl_height * view_bh, img_bh); + + lvl_width <<= range->baseMipLevel; + lvl_height <<= range->baseMipLevel; + + iview->extent.width = CLAMP(lvl_width, iview->extent.width, + iview->image->planes[0].surface.u.gfx9.base_mip_width); + iview->extent.height = CLAMP(lvl_height, iview->extent.height, + iview->image->planes[0].surface.u.gfx9.base_mip_height); + } + } } - iview->base_layer = range->baseArrayLayer; - iview->layer_count = radv_get_layerCount(image, range); - iview->base_mip = range->baseMipLevel; - iview->level_count = radv_get_levelCount(image, range); - bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false; for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) { VkFormat format = vk_format_get_plane_format(iview->vk_format, i); diff -Nru mesa-20.2.1/src/amd/vulkan/radv_pipeline.c mesa-20.2.6/src/amd/vulkan/radv_pipeline.c --- mesa-20.2.1/src/amd/vulkan/radv_pipeline.c 2020-10-14 17:19:10.306516200 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_pipeline.c 2020-12-16 21:42:03.563110000 +0000 @@ -219,6 +219,10 @@ hash_flags |= RADV_HASH_SHADER_GE_WAVE32; if (device->physical_device->use_llvm) hash_flags |= RADV_HASH_SHADER_LLVM; + if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE) + hash_flags |= RADV_HASH_SHADER_DISCARD_TO_DEMOTE; + if (device->instance->enable_mrt_output_nan_fixup) + hash_flags |= RADV_HASH_SHADER_MRT_NAN_FIXUP; return hash_flags; } @@ -551,8 +555,10 @@ /* The output for dual source blending should have the same format as * the first output. */ - if (blend->mrt0_is_dual_src) + if (blend->mrt0_is_dual_src) { + assert(!(col_format >> 4)); col_format |= (col_format & 0xf) << 4; + } blend->spi_shader_col_format = col_format; blend->col_format_is_int8 = is_int8; @@ -680,6 +686,12 @@ if (!att->colorWriteMask) continue; + /* Ignore other blend targets if dual-source blending + * is enabled to prevent wrong behaviour. + */ + if (blend.mrt0_is_dual_src) + continue; + blend.cb_target_mask |= (unsigned)att->colorWriteMask << (4 * i); blend.cb_target_enabled_4bit |= 0xfu << (4 * i); if (!att->blendEnable) { @@ -912,6 +924,21 @@ } static bool +radv_is_state_dynamic(const VkGraphicsPipelineCreateInfo *pCreateInfo, + VkDynamicState state) +{ + if (pCreateInfo->pDynamicState) { + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t i = 0; i < count; i++) { + if (pCreateInfo->pDynamicState->pDynamicStates[i] == state) + return true; + } + } + + return false; +} + +static bool radv_pipeline_has_dynamic_ds_states(const VkGraphicsPipelineCreateInfo *pCreateInfo) { VkDynamicState ds_states[] = { @@ -922,14 +949,9 @@ VK_DYNAMIC_STATE_STENCIL_OP_EXT, }; - if (pCreateInfo->pDynamicState) { - uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; - for (uint32_t i = 0; i < count; i++) { - for (uint32_t j = 0; j < ARRAY_SIZE(ds_states); j++) { - if (pCreateInfo->pDynamicState->pDynamicStates[i] == ds_states[j]) - return true; - } - } + for (uint32_t i = 0; i < ARRAY_SIZE(ds_states); i++) { + if (radv_is_state_dynamic(pCreateInfo, ds_states[i])) + return true; } return false; @@ -1329,11 +1351,13 @@ states &= ~RADV_DYNAMIC_DEPTH_BIAS; if (!pCreateInfo->pDepthStencilState || - !pCreateInfo->pDepthStencilState->depthBoundsTestEnable) + (!pCreateInfo->pDepthStencilState->depthBoundsTestEnable && + !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT))) states &= ~RADV_DYNAMIC_DEPTH_BOUNDS; if (!pCreateInfo->pDepthStencilState || - !pCreateInfo->pDepthStencilState->stencilTestEnable) + (!pCreateInfo->pDepthStencilState->stencilTestEnable && + !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT))) states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | RADV_DYNAMIC_STENCIL_REFERENCE); @@ -2051,21 +2075,33 @@ (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size); max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); + /* Hardware restriction: minimum value of max_esverts */ + max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); max_gsprims = align(max_gsprims, wavesize); max_gsprims = MIN2(max_gsprims, max_gsprims_base); - if (gsprim_lds_size) - max_gsprims = MIN2(max_gsprims, - (max_lds_size - max_esverts * esvert_lds_size) / - gsprim_lds_size); + if (gsprim_lds_size) { + /* Don't count unusable vertices to the LDS + * size. Those are vertices above the maximum + * number of vertices that can occur in the + * workgroup, which is e.g. max_gsprims * 3 + * for triangles. + */ + unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); + max_gsprims = + MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size); + } clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims); - } - /* Hardware restriction: minimum value of max_esverts */ - max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); + /* Verify the restriction. */ + assert(max_esverts >= min_esverts - 1 + max_verts_per_prim); + } else { + /* Hardware restriction: minimum value of max_esverts */ + max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); + } unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out : @@ -2092,7 +2128,10 @@ ngg->prim_amp_factor = prim_amp_factor; ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance; ngg->ngg_emit_size = max_gsprims * gsprim_lds_size; - ngg->esgs_ring_size = 4 * max_esverts * esvert_lds_size; + + /* Don't count unusable vertices. */ + ngg->esgs_ring_size = + MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4; if (gs_type == MESA_SHADER_GEOMETRY) { ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4; @@ -2330,6 +2369,7 @@ pCreateInfo->pVertexInputState; const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state = vk_find_struct_const(input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); + bool uses_dynamic_stride = false; struct radv_pipeline_key key; memset(&key, 0, sizeof(key)); @@ -2355,6 +2395,16 @@ } } + if (pCreateInfo->pDynamicState) { + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t i = 0; i < count; i++) { + if (pCreateInfo->pDynamicState->pDynamicStates[i] == VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT) { + uses_dynamic_stride = true; + break; + } + } + } + for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) { const VkVertexInputAttributeDescription *desc = &input_state->pVertexAttributeDescriptions[i]; @@ -2378,7 +2428,26 @@ key.vertex_attribute_formats[location] = data_format | (num_format << 4); key.vertex_attribute_bindings[location] = desc->binding; key.vertex_attribute_offsets[location] = desc->offset; - key.vertex_attribute_strides[location] = radv_get_attrib_stride(input_state, desc->binding); + + if (!uses_dynamic_stride) { + /* From the Vulkan spec 1.2.157: + * + * "If the bound pipeline state object was created + * with the + * VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT + * dynamic state enabled then pStrides[i] specifies + * the distance in bytes between two consecutive + * elements within the corresponding buffer. In this + * case the VkVertexInputBindingDescription::stride + * state from the pipeline state object is ignored." + * + * Make sure the vertex attribute stride is zero to + * avoid computing a wrong offset if it's initialized + * to something else than zero. + */ + key.vertex_attribute_strides[location] = + radv_get_attrib_stride(input_state, desc->binding); + } if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 && pipeline->device->physical_device->rad_info.family != CHIP_STONEY) { diff -Nru mesa-20.2.1/src/amd/vulkan/radv_private.h mesa-20.2.6/src/amd/vulkan/radv_private.h --- mesa-20.2.1/src/amd/vulkan/radv_private.h 2020-10-14 17:19:10.306516200 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_private.h 2020-12-16 21:42:03.563110000 +0000 @@ -307,6 +307,7 @@ VkPhysicalDeviceMemoryProperties memory_properties; enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES]; enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES]; + unsigned heaps; drmPciBusInfo bus_info; @@ -1332,6 +1333,8 @@ uint32_t current_event_type; uint32_t num_events; uint32_t num_layout_transitions; + + uint8_t cb_mip[MAX_RTS]; }; struct radv_cmd_pool { @@ -1589,6 +1592,8 @@ #define RADV_HASH_SHADER_PS_WAVE32 (1 << 2) #define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) #define RADV_HASH_SHADER_LLVM (1 << 4) +#define RADV_HASH_SHADER_DISCARD_TO_DEMOTE (1 << 5) +#define RADV_HASH_SHADER_MRT_NAN_FIXUP (1 << 6) void radv_hash_shaders(unsigned char *hash, diff -Nru mesa-20.2.1/src/amd/vulkan/radv_shader_args.c mesa-20.2.6/src/amd/vulkan/radv_shader_args.c --- mesa-20.2.1/src/amd/vulkan/radv_shader_args.c 2020-10-14 17:19:10.306516200 +0000 +++ mesa-20.2.6/src/amd/vulkan/radv_shader_args.c 2020-12-16 21:42:03.564110000 +0000 @@ -89,11 +89,16 @@ if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index)) return true; break; - case MESA_SHADER_GEOMETRY: case MESA_SHADER_TESS_CTRL: if (args->shader_info->needs_multiview_view_index) return true; break; + case MESA_SHADER_GEOMETRY: + if (args->shader_info->needs_multiview_view_index || + (args->options->key.vs_common_out.as_ngg && + args->options->key.has_multiview_view_index)) + return true; + break; default: break; } diff -Nru mesa-20.2.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c mesa-20.2.6/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c --- mesa-20.2.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c 2020-10-14 17:19:10.309849500 +0000 +++ mesa-20.2.6/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c 2020-12-16 21:42:03.567110000 +0000 @@ -166,6 +166,8 @@ amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]); free(ws->syncobj); + pthread_mutex_destroy(&ws->syncobj_lock); + pthread_mutex_destroy(&ws->global_bo_list_lock); ac_addrlib_destroy(ws->addrlib); amdgpu_device_deinitialize(ws->dev); FREE(rws); diff -Nru mesa-20.2.1/src/amd/vulkan/winsys/null/radv_null_winsys.c mesa-20.2.6/src/amd/vulkan/winsys/null/radv_null_winsys.c --- mesa-20.2.1/src/amd/vulkan/winsys/null/radv_null_winsys.c 2020-10-14 17:19:10.309849500 +0000 +++ mesa-20.2.6/src/amd/vulkan/winsys/null/radv_null_winsys.c 2020-12-16 21:42:03.568110000 +0000 @@ -103,6 +103,7 @@ info->pci_id = gpu_info[info->family].pci_id; info->has_syncobj_wait_for_submit = true; info->max_se = 4; + info->num_se = 4; if (info->chip_class >= GFX10_3) info->max_wave64_per_simd = 16; else if (info->chip_class >= GFX10) diff -Nru mesa-20.2.1/src/compiler/Android.nir.gen.mk mesa-20.2.6/src/compiler/Android.nir.gen.mk --- mesa-20.2.1/src/compiler/Android.nir.gen.mk 2020-10-14 17:19:10.316516200 +0000 +++ mesa-20.2.6/src/compiler/Android.nir.gen.mk 2020-12-16 21:42:03.575110000 +0000 @@ -100,11 +100,11 @@ @mkdir -p $(dir $@) $(hide) $(MESA_PYTHON2) $^ $@ || ($(RM) $@; false) -$(intermediates)/spirv/vtn_gather_types.c:: $(LOCAL_PATH)/spirv/vtn_gather_types_c.py $(LOCAL_PATH)/spirv/spirv.core.grammar.json +$(intermediates)/spirv/vtn_gather_types.c: $(LOCAL_PATH)/spirv/vtn_gather_types_c.py $(LOCAL_PATH)/spirv/spirv.core.grammar.json @mkdir -p $(dir $@) $(hide) $(MESA_PYTHON2) $^ $@ || ($(RM) $@; false) -$(intermediates)/spirv/vtn_generator_ids.h:: $(LOCAL_PATH)/spirv/vtn_generator_ids_h.py $(LOCAL_PATH)/spirv/spir-v.xml +$(intermediates)/spirv/vtn_generator_ids.h: $(LOCAL_PATH)/spirv/vtn_generator_ids_h.py $(LOCAL_PATH)/spirv/spir-v.xml @mkdir -p $(dir $@) $(hide) $(MESA_PYTHON2) $^ $@ || ($(RM) $@; false) diff -Nru mesa-20.2.1/src/compiler/glsl/glcpp/glcpp-parse.y mesa-20.2.6/src/compiler/glsl/glcpp/glcpp-parse.y --- mesa-20.2.1/src/compiler/glsl/glcpp/glcpp-parse.y 2020-10-14 17:19:10.323183000 +0000 +++ mesa-20.2.6/src/compiler/glsl/glcpp/glcpp-parse.y 2020-12-16 21:42:03.581110000 +0000 @@ -781,7 +781,10 @@ junk: /* empty */ | pp_tokens { - glcpp_error(&@1, parser, "extra tokens at end of directive"); + if (parser->gl_ctx->Const.AllowExtraPPTokens) + glcpp_warning(&@1, parser, "extra tokens at end of directive"); + else + glcpp_error(&@1, parser, "extra tokens at end of directive"); } ; diff -Nru mesa-20.2.1/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected mesa-20.2.6/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected --- mesa-20.2.1/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected 2020-10-14 17:19:10.326516400 +0000 +++ mesa-20.2.6/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected 2020-12-16 21:42:03.584110000 +0000 @@ -1,2 +1,2 @@ -0:2(8): preprocessor error: syntax error, unexpected $end +0:2(8): preprocessor error: syntax error, unexpected end of file diff -Nru mesa-20.2.1/src/compiler/glsl/glcpp/tests/glcpp_test.py mesa-20.2.6/src/compiler/glsl/glcpp/tests/glcpp_test.py --- mesa-20.2.1/src/compiler/glsl/glcpp/tests/glcpp_test.py 2020-10-14 17:19:10.329849700 +0000 +++ mesa-20.2.6/src/compiler/glsl/glcpp/tests/glcpp_test.py 2020-12-16 21:42:03.587110000 +0000 @@ -79,6 +79,10 @@ with open(expfile, 'r') as f: expected = f.read() + # Bison 3.6 changed '$end' to 'end of file' in its error messages + # See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3181 + actual = actual.replace('$end', 'end of file') + if actual == expected: return (True, []) return (False, difflib.unified_diff(actual.splitlines(), expected.splitlines())) diff -Nru mesa-20.2.1/src/compiler/glsl/link_varyings.cpp mesa-20.2.6/src/compiler/glsl/link_varyings.cpp --- mesa-20.2.1/src/compiler/glsl/link_varyings.cpp 2020-10-14 17:19:10.336516400 +0000 +++ mesa-20.2.6/src/compiler/glsl/link_varyings.cpp 2020-12-16 21:42:03.593110000 +0000 @@ -875,10 +875,40 @@ /* Check for input vars with unmatched output vars in prev stage * taking into account that interface blocks could have a matching * output but with different name, so we ignore them. + * + * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say: + * + * "Only the input variables that are actually read need to be + * written by the previous stage; it is allowed to have + * superfluous declarations of input variables." + * + * However it's not defined anywhere as to how we should handle + * inputs that are not written in the previous stage and it's not + * clear what "actually read" means. + * + * The GLSL 4.20 spec however is much clearer: + * + * "Only the input variables that are statically read need to + * be written by the previous stage; it is allowed to have + * superfluous declarations of input variables." + * + * It also has a table that states it is an error to statically + * read an input that is not defined in the previous stage. While + * it is not an error to not statically write to the output (it + * just needs to be defined to not be an error). + * + * The text in the GLSL 4.20 spec was an attempt to clarify the + * previous spec iterations. However given the difference in spec + * and that some applications seem to depend on not erroring when + * the input is not actually read in control flow we only apply + * this rule to GLSL 4.00 and higher. GLSL 4.00 was chosen as + * a 3.30 shader is the highest version of GLSL we have seen in + * the wild dependant on the less strict interpretation. */ assert(!input->data.assigned); if (input->data.used && !input->get_interface_type() && - !input->data.explicit_location) + !input->data.explicit_location && + (prog->data->Version >= (prog->IsES ? 0 : 400))) linker_error(prog, "%s shader input `%s' " "has no matching output in the previous stage\n", diff -Nru mesa-20.2.1/src/compiler/nir/meson.build mesa-20.2.6/src/compiler/nir/meson.build --- mesa-20.2.1/src/compiler/nir/meson.build 2020-10-14 17:19:10.346516400 +0000 +++ mesa-20.2.6/src/compiler/nir/meson.build 2020-12-16 21:42:03.604110000 +0000 @@ -396,4 +396,17 @@ ), suite : ['compiler', 'nir'], ) + + test( + 'nir_lower_returns', + executable( + 'nir_lower_returns_tests', + files('tests/lower_returns_tests.cpp'), + cpp_args : [cpp_msvc_compat_args], + gnu_symbol_visibility : 'hidden', + include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux], + dependencies : [dep_thread, idep_gtest, idep_nir, idep_mesautil], + ), + suite : ['compiler', 'nir'], + ) endif diff -Nru mesa-20.2.1/src/compiler/nir/nir.c mesa-20.2.6/src/compiler/nir/nir.c --- mesa-20.2.1/src/compiler/nir/nir.c 2020-10-14 17:19:10.346516400 +0000 +++ mesa-20.2.6/src/compiler/nir/nir.c 2020-12-16 21:42:03.604110000 +0000 @@ -271,6 +271,20 @@ dest->saturate = src->saturate; } +bool +nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn) +{ + static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS); + + const nir_alu_src *src = &alu->src[srcn]; + unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn); + + return src->src.is_ssa && (src->src.ssa->num_components == num_components) && + !src->abs && !src->negate && + (memcmp(src->swizzle, trivial_swizzle, num_components) == 0); +} + static void cf_init(nir_cf_node *node, nir_cf_node_type type) diff -Nru mesa-20.2.1/src/compiler/nir/nir_control_flow.c mesa-20.2.6/src/compiler/nir/nir_control_flow.c --- mesa-20.2.1/src/compiler/nir/nir_control_flow.c 2020-10-14 17:19:10.349849700 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_control_flow.c 2020-12-16 21:42:03.607110000 +0000 @@ -226,8 +226,8 @@ } } -static void -insert_phi_undef(nir_block *block, nir_block *pred) +void +nir_insert_phi_undef(nir_block *block, nir_block *pred) { nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); nir_foreach_instr(instr, block) { @@ -298,7 +298,7 @@ nir_block *head_block = nir_loop_first_block(loop); link_blocks(block, head_block, NULL); - insert_phi_undef(head_block, block); + nir_insert_phi_undef(head_block, block); } else { nir_function_impl *impl = nir_cf_node_as_function(parent); link_blocks(block, impl->end_block, NULL); @@ -318,7 +318,7 @@ nir_block *first_block = nir_loop_first_block(next_loop); link_blocks(block, first_block, NULL); - insert_phi_undef(first_block, block); + nir_insert_phi_undef(first_block, block); } } } diff -Nru mesa-20.2.1/src/compiler/nir/nir_control_flow.h mesa-20.2.6/src/compiler/nir/nir_control_flow.h --- mesa-20.2.1/src/compiler/nir/nir_control_flow.h 2020-10-14 17:19:10.349849700 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_control_flow.h 2020-12-16 21:42:03.607110000 +0000 @@ -171,6 +171,9 @@ nir_cf_delete(&list); } +/** inserts undef phi sources from predcessor into phis of the block */ +void nir_insert_phi_undef(nir_block *block, nir_block *pred); + #ifdef __cplusplus } #endif diff -Nru mesa-20.2.1/src/compiler/nir/nir.h mesa-20.2.6/src/compiler/nir/nir.h --- mesa-20.2.1/src/compiler/nir/nir.h 2020-10-14 17:19:10.346516400 +0000 +++ mesa-20.2.6/src/compiler/nir/nir.h 2020-12-16 21:42:03.606110000 +0000 @@ -1406,6 +1406,8 @@ const nir_alu_instr *alu2, unsigned src1, unsigned src2); +bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn); + typedef enum { nir_deref_type_var, nir_deref_type_array, diff -Nru mesa-20.2.1/src/compiler/nir/nir_intrinsics.py mesa-20.2.6/src/compiler/nir/nir_intrinsics.py --- mesa-20.2.1/src/compiler/nir/nir_intrinsics.py 2020-10-14 17:19:10.349849700 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_intrinsics.py 2020-12-16 21:42:03.608110000 +0000 @@ -873,7 +873,7 @@ # Loads the sampler paramaters # src[] = { sampler_index } -load("sampler_lod_parameters_pan", [1], [CAN_ELIMINATE, CAN_REORDER]) +load("sampler_lod_parameters_pan", [1], flags=[CAN_ELIMINATE, CAN_REORDER]) # R600 specific instrincs # diff -Nru mesa-20.2.1/src/compiler/nir/nir_lower_returns.c mesa-20.2.6/src/compiler/nir/nir_lower_returns.c --- mesa-20.2.1/src/compiler/nir/nir_lower_returns.c 2020-10-14 17:19:10.353183300 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_lower_returns.c 2020-12-16 21:42:03.619110000 +0000 @@ -63,9 +63,10 @@ /* If we're inside of a loop, then all we need to do is insert a * conditional break. */ - nir_jump_instr *brk = - nir_jump_instr_create(state->builder.shader, nir_jump_break); - nir_instr_insert(nir_before_cf_list(&if_stmt->then_list), &brk->instr); + nir_jump(b, nir_jump_break); + + nir_block *block = nir_cursor_current_block(b->cursor); + nir_insert_phi_undef(block->successors[0], block); } else { /* Otherwise, we need to actually move everything into the else case * of the if statement. @@ -207,6 +208,8 @@ if (state->loop) { /* We're in a loop; we need to break out of it. */ nir_jump(b, nir_jump_break); + + nir_insert_phi_undef(block->successors[0], block); } else { /* Not in a loop; we'll deal with predicating later*/ assert(nir_cf_node_next(&block->cf_node) == NULL); diff -Nru mesa-20.2.1/src/compiler/nir/nir_opt_copy_prop_vars.c mesa-20.2.6/src/compiler/nir/nir_opt_copy_prop_vars.c --- mesa-20.2.1/src/compiler/nir/nir_opt_copy_prop_vars.c 2020-10-14 17:19:10.356516600 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_opt_copy_prop_vars.c 2020-12-16 21:42:03.622110100 +0000 @@ -185,15 +185,19 @@ break; case nir_intrinsic_deref_atomic_add: + case nir_intrinsic_deref_atomic_fadd: case nir_intrinsic_deref_atomic_imin: case nir_intrinsic_deref_atomic_umin: + case nir_intrinsic_deref_atomic_fmin: case nir_intrinsic_deref_atomic_imax: case nir_intrinsic_deref_atomic_umax: + case nir_intrinsic_deref_atomic_fmax: case nir_intrinsic_deref_atomic_and: case nir_intrinsic_deref_atomic_or: case nir_intrinsic_deref_atomic_xor: case nir_intrinsic_deref_atomic_exchange: case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fcomp_swap: case nir_intrinsic_store_deref: case nir_intrinsic_copy_deref: { /* Destination in all of store_deref, copy_deref and the atomics is src[0]. */ @@ -1057,15 +1061,19 @@ } case nir_intrinsic_deref_atomic_add: + case nir_intrinsic_deref_atomic_fadd: case nir_intrinsic_deref_atomic_imin: case nir_intrinsic_deref_atomic_umin: + case nir_intrinsic_deref_atomic_fmin: case nir_intrinsic_deref_atomic_imax: case nir_intrinsic_deref_atomic_umax: + case nir_intrinsic_deref_atomic_fmax: case nir_intrinsic_deref_atomic_and: case nir_intrinsic_deref_atomic_or: case nir_intrinsic_deref_atomic_xor: case nir_intrinsic_deref_atomic_exchange: case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fcomp_swap: if (debug) dump_instr(instr); nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); diff -Nru mesa-20.2.1/src/compiler/nir/nir_opt_if.c mesa-20.2.6/src/compiler/nir/nir_opt_if.c --- mesa-20.2.1/src/compiler/nir/nir_opt_if.c 2020-10-14 17:19:10.356516600 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_opt_if.c 2020-12-16 21:42:03.623110000 +0000 @@ -644,14 +644,13 @@ bool match = true; for (unsigned i = 0; i < 3; i++) { - /* FINISHME: The abs and negate cases could be handled by adding - * move instructions at the bottom of the continue block and more - * phi nodes in the header_block. + /* FINISHME: The abs, negate and swizzled cases could be handled by + * adding move instructions at the bottom of the continue block and + * more phi nodes in the header_block. */ - if (!bcsel->src[i].src.is_ssa || + if (!nir_alu_src_is_trivial_ssa(bcsel, i) || bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi || - bcsel->src[i].src.ssa->parent_instr->block != header_block || - bcsel->src[i].negate || bcsel->src[i].abs) { + bcsel->src[i].src.ssa->parent_instr->block != header_block) { match = false; break; } diff -Nru mesa-20.2.1/src/compiler/nir/nir_opt_intrinsics.c mesa-20.2.6/src/compiler/nir/nir_opt_intrinsics.c --- mesa-20.2.1/src/compiler/nir/nir_opt_intrinsics.c 2020-10-14 17:19:10.356516600 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_opt_intrinsics.c 2020-12-16 21:42:03.623110000 +0000 @@ -86,6 +86,7 @@ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(new_expr)); nir_instr_remove(&alu->instr); + progress = true; continue; } } diff -Nru mesa-20.2.1/src/compiler/nir/nir_opt_large_constants.c mesa-20.2.6/src/compiler/nir/nir_opt_large_constants.c --- mesa-20.2.1/src/compiler/nir/nir_opt_large_constants.c 2020-10-14 17:19:10.356516600 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_opt_large_constants.c 2020-12-16 21:42:03.623110000 +0000 @@ -50,7 +50,9 @@ uint32_t a_size = a->constant_data_size; uint32_t b_size = b->constant_data_size; - if (a_size < b_size) { + if (a->is_constant != b->is_constant) { + return (int)a->is_constant - (int)b->is_constant; + } else if (a_size < b_size) { return -1; } else if (a_size > b_size) { return 1; diff -Nru mesa-20.2.1/src/compiler/nir/nir_opt_load_store_vectorize.c mesa-20.2.6/src/compiler/nir/nir_opt_load_store_vectorize.c --- mesa-20.2.1/src/compiler/nir/nir_opt_load_store_vectorize.c 2020-10-14 17:19:10.356516600 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_opt_load_store_vectorize.c 2020-12-16 21:42:03.623110000 +0000 @@ -1219,6 +1219,11 @@ case nir_intrinsic_discard: modes = nir_var_all; break; + case nir_intrinsic_demote_if: + case nir_intrinsic_demote: + acquire = false; + modes = nir_var_all; + break; case nir_intrinsic_memory_barrier_buffer: modes = nir_var_mem_ssbo | nir_var_mem_global; break; diff -Nru mesa-20.2.1/src/compiler/nir/nir_range_analysis.c mesa-20.2.6/src/compiler/nir/nir_range_analysis.c --- mesa-20.2.1/src/compiler/nir/nir_range_analysis.c 2020-10-14 17:19:10.359850000 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_range_analysis.c 2020-12-16 21:42:03.624110000 +0000 @@ -1099,6 +1099,7 @@ return a * b; } +/* recursively gather at most "buf_size" phi/bcsel sources */ static unsigned search_phi_bcsel(nir_ssa_scalar scalar, nir_ssa_scalar *buf, unsigned buf_size, struct set *visited) { @@ -1109,15 +1110,17 @@ if (scalar.def->parent_instr->type == nir_instr_type_phi) { nir_phi_instr *phi = nir_instr_as_phi(scalar.def->parent_instr); unsigned num_sources_left = exec_list_length(&phi->srcs); - unsigned total_added = 0; - nir_foreach_phi_src(src, phi) { - unsigned added = search_phi_bcsel( - (nir_ssa_scalar){src->src.ssa, 0}, buf + total_added, buf_size - num_sources_left, visited); - buf_size -= added; - total_added += added; - num_sources_left--; + if (buf_size >= num_sources_left) { + unsigned total_added = 0; + nir_foreach_phi_src(src, phi) { + unsigned added = search_phi_bcsel( + (nir_ssa_scalar){src->src.ssa, 0}, buf + total_added, buf_size - num_sources_left, visited); + buf_size -= added; + total_added += added; + num_sources_left--; + } + return total_added; } - return total_added; } if (nir_ssa_scalar_is_alu(scalar)) { diff -Nru mesa-20.2.1/src/compiler/nir/nir_schedule.c mesa-20.2.6/src/compiler/nir/nir_schedule.c --- mesa-20.2.1/src/compiler/nir/nir_schedule.c 2020-10-14 17:19:10.359850000 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_schedule.c 2020-12-16 21:42:03.625110100 +0000 @@ -355,6 +355,8 @@ case nir_intrinsic_discard: case nir_intrinsic_discard_if: + case nir_intrinsic_demote: + case nir_intrinsic_demote_if: /* We are adding two dependencies: * * * A individual one that we could use to add a read_dep while handling diff -Nru mesa-20.2.1/src/compiler/nir/nir_split_vars.c mesa-20.2.6/src/compiler/nir/nir_split_vars.c --- mesa-20.2.1/src/compiler/nir/nir_split_vars.c 2020-10-14 17:19:10.359850000 +0000 +++ mesa-20.2.6/src/compiler/nir/nir_split_vars.c 2020-12-16 21:42:03.626110000 +0000 @@ -232,6 +232,14 @@ continue; nir_variable *base_var = nir_deref_instr_get_variable(deref); + /* If we can't chase back to the variable, then we're a complex use. + * This should have been detected by get_complex_used_vars() and the + * variable should not have been split. However, we have no way of + * knowing that here, so we just have to trust it. + */ + if (base_var == NULL) + continue; + struct hash_entry *entry = _mesa_hash_table_search(var_field_map, base_var); if (!entry) diff -Nru mesa-20.2.1/src/compiler/nir/tests/lower_returns_tests.cpp mesa-20.2.6/src/compiler/nir/tests/lower_returns_tests.cpp --- mesa-20.2.1/src/compiler/nir/tests/lower_returns_tests.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-20.2.6/src/compiler/nir/tests/lower_returns_tests.cpp 2020-12-16 21:42:03.627110000 +0000 @@ -0,0 +1,213 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "nir.h" +#include "nir_builder.h" + +class nir_opt_lower_returns_test : public ::testing::Test { +protected: + nir_opt_lower_returns_test(); + ~nir_opt_lower_returns_test(); + + nir_builder bld; + + nir_ssa_def *in_def; +}; + +nir_opt_lower_returns_test::nir_opt_lower_returns_test() +{ + glsl_type_singleton_init_or_ref(); + + static const nir_shader_compiler_options options = { }; + nir_builder_init_simple_shader(&bld, NULL, MESA_SHADER_VERTEX, &options); + + nir_variable *var = nir_variable_create(bld.shader, nir_var_shader_in, glsl_int_type(), "in"); + in_def = nir_load_var(&bld, var); +} + +nir_opt_lower_returns_test::~nir_opt_lower_returns_test() +{ + ralloc_free(bld.shader); + glsl_type_singleton_decref(); +} + +nir_phi_instr *create_one_source_phi(nir_shader *shader, nir_block *pred, + nir_ssa_def *def) +{ + nir_phi_instr *phi = nir_phi_instr_create(shader); + + nir_phi_src *phi_src; + phi_src = ralloc(phi, nir_phi_src); + phi_src->pred = pred; + phi_src->src = nir_src_for_ssa(def); + exec_list_push_tail(&phi->srcs, &phi_src->node); + + nir_ssa_dest_init(&phi->instr, &phi->dest, + def->num_components, def->bit_size, NULL); + + return phi; +} + +TEST_F(nir_opt_lower_returns_test, phis_after_loop) +{ + /* Test that after lowering of "return" the phis in block_5 + * have two sources, because block_2 will have block_5 + * as a successor. + * + * block block_0: + * loop { + * block block_1: + * if ssa_2 { + * block block_2: + * return + * // succs: block_6 + * } else { + * block block_3: + * break; + * // succs: block_5 + * } + * block block_4: + * } + * block block_5: + * // preds: block_3 + * vec1 32 ssa_4 = phi block_3: ssa_1 + * vec1 32 ssa_5 = phi block_3: ssa_1 + * // succs: block_6 + * block block_6: + */ + + nir_loop *loop = nir_push_loop(&bld); + + nir_ssa_def *one = nir_imm_int(&bld, 1); + + nir_ssa_def *cmp_result = nir_ieq(&bld, in_def, one); + nir_if *nif = nir_push_if(&bld, cmp_result); + + nir_jump(&bld, nir_jump_return); + + nir_push_else(&bld, NULL); + + nir_jump(&bld, nir_jump_break); + + nir_pop_if(&bld, NULL); + + nir_block *else_block = nir_if_last_else_block(nif); + + nir_pop_loop(&bld, loop); + + bld.cursor = nir_after_cf_node_and_phis(&loop->cf_node); + + nir_phi_instr *const phi_1 = + create_one_source_phi(bld.shader, else_block, one); + nir_builder_instr_insert(&bld, &phi_1->instr); + + nir_phi_instr *const phi_2 = + create_one_source_phi(bld.shader, else_block, one); + nir_builder_instr_insert(&bld, &phi_2->instr); + + ASSERT_TRUE(nir_lower_returns(bld.shader)); + EXPECT_EQ(phi_1->srcs.length(), 2); + EXPECT_EQ(phi_2->srcs.length(), 2); + + nir_validate_shader(bld.shader, NULL); +} + +TEST_F(nir_opt_lower_returns_test, phis_after_outer_loop) +{ + /* Test that after lowering of "return" the phis in block_7 + * have two sources, because block_6 will have a conditional break + * inserted, which will add a new predcessor to block_7. + * + * block block_0: + * loop { + * block block_1: + * loop { + * block block_2: + * if ssa_2 { + * block block_3: + * return + * // succs: block_8 + * } else { + * block block_4: + * break; + * // succs: block_6 + * } + * block block_5: + * } + * block block_6: + * break; + * // succs: block_7 + * } + * block block_7: + * // preds: block_6 + * vec1 32 ssa_4 = phi block_6: ssa_1 + * vec1 32 ssa_5 = phi block_6: ssa_1 + * // succs: block_8 + * block block_8: + */ + + nir_loop *loop_outer = nir_push_loop(&bld); + + bld.cursor = nir_after_cf_list(&loop_outer->body); + + nir_loop *loop_inner = nir_push_loop(&bld); + + bld.cursor = nir_after_cf_list(&loop_inner->body); + + nir_ssa_def *one = nir_imm_int(&bld, 1); + + nir_ssa_def *cmp_result = nir_ieq(&bld, in_def, one); + nir_push_if(&bld, cmp_result); + + nir_jump(&bld, nir_jump_return); + + nir_push_else(&bld, NULL); + + nir_jump(&bld, nir_jump_break); + + nir_pop_if(&bld, NULL); + + nir_pop_loop(&bld, loop_inner); + + bld.cursor = nir_after_cf_node_and_phis(&loop_inner->cf_node); + + nir_jump(&bld, nir_jump_break); + + nir_pop_loop(&bld, loop_outer); + + bld.cursor = nir_after_cf_node_and_phis(&loop_outer->cf_node); + + nir_phi_instr *const phi_1 = + create_one_source_phi(bld.shader, nir_loop_last_block(loop_outer), one); + nir_builder_instr_insert(&bld, &phi_1->instr); + + nir_phi_instr *const phi_2 = + create_one_source_phi(bld.shader, nir_loop_last_block(loop_outer), one); + nir_builder_instr_insert(&bld, &phi_2->instr); + + ASSERT_TRUE(nir_lower_returns(bld.shader)); + EXPECT_EQ(phi_1->srcs.length(), 2); + EXPECT_EQ(phi_2->srcs.length(), 2); + + nir_validate_shader(bld.shader, NULL); +} diff -Nru mesa-20.2.1/src/compiler/spirv/vtn_glsl450.c mesa-20.2.6/src/compiler/spirv/vtn_glsl450.c --- mesa-20.2.1/src/compiler/spirv/vtn_glsl450.c 2020-10-14 17:19:10.363183300 +0000 +++ mesa-20.2.6/src/compiler/spirv/vtn_glsl450.c 2020-12-16 21:42:03.631110200 +0000 @@ -339,8 +339,11 @@ nir_ssa_def *sign = nir_fsign(nb, src[0]); nir_ssa_def *abs = nir_fabs(nb, src[0]); dest->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); - nir_store_deref(nb, vtn_nir_deref(b, w[6]), - nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf); + + struct vtn_pointer *i_ptr = vtn_value(b, w[6], vtn_value_type_pointer)->pointer; + struct vtn_ssa_value *whole = vtn_create_ssa_value(b, i_ptr->type->type); + whole->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); + vtn_variable_store(b, whole, i_ptr); break; } @@ -526,9 +529,12 @@ break; case GLSLstd450Frexp: { - nir_ssa_def *exponent = nir_frexp_exp(nb, src[0]); dest->def = nir_frexp_sig(nb, src[0]); - nir_store_deref(nb, vtn_nir_deref(b, w[6]), exponent, 0xf); + + struct vtn_pointer *i_ptr = vtn_value(b, w[6], vtn_value_type_pointer)->pointer; + struct vtn_ssa_value *exp = vtn_create_ssa_value(b, i_ptr->type->type); + exp->def = nir_frexp_exp(nb, src[0]); + vtn_variable_store(b, exp, i_ptr); break; } diff -Nru mesa-20.2.1/src/egl/drivers/dri2/egl_dri2.c mesa-20.2.6/src/egl/drivers/dri2/egl_dri2.c --- mesa-20.2.1/src/egl/drivers/dri2/egl_dri2.c 2020-10-14 17:19:10.366516600 +0000 +++ mesa-20.2.6/src/egl/drivers/dri2/egl_dri2.c 2020-12-16 21:42:03.633110000 +0000 @@ -685,7 +685,10 @@ (void) screen; + mtx_lock(&disp->Mutex); img = _eglLookupImage(image, disp); + mtx_unlock(&disp->Mutex); + if (img == NULL) { _eglError(EGL_BAD_PARAMETER, "dri2_lookup_egl_image"); return NULL; diff -Nru mesa-20.2.1/src/etnaviv/drm/etnaviv_bo.c mesa-20.2.6/src/etnaviv/drm/etnaviv_bo.c --- mesa-20.2.1/src/etnaviv/drm/etnaviv_bo.c 2020-10-14 17:19:10.369850000 +0000 +++ mesa-20.2.6/src/etnaviv/drm/etnaviv_bo.c 2020-12-16 21:42:03.637110000 +0000 @@ -257,11 +257,15 @@ struct etna_device *dev = bo->dev; - if (!p_atomic_dec_zero(&bo->refcnt)) - return; - pthread_mutex_lock(&etna_drm_table_lock); + /* Must test under table lock to avoid racing with the from_dmabuf/name + * paths, which rely on the BO refcount to be stable over the lookup, so + * they can grab a reference when the BO is found in the hash. + */ + if (!p_atomic_dec_zero(&bo->refcnt)) + goto out; + if (bo->reuse && (etna_bo_cache_free(&dev->bo_cache, bo) == 0)) goto out; diff -Nru mesa-20.2.1/src/etnaviv/drm/etnaviv_bo_cache.c mesa-20.2.6/src/etnaviv/drm/etnaviv_bo_cache.c --- mesa-20.2.1/src/etnaviv/drm/etnaviv_bo_cache.c 2020-10-14 17:19:10.369850000 +0000 +++ mesa-20.2.6/src/etnaviv/drm/etnaviv_bo_cache.c 2020-12-16 21:42:03.637110000 +0000 @@ -28,7 +28,6 @@ #include "etnaviv_drmif.h" void _etna_bo_del(struct etna_bo *bo); -extern pthread_mutex_t etna_drm_table_lock; static void add_bucket(struct etna_bo_cache *cache, int size) { diff -Nru mesa-20.2.1/src/etnaviv/drm/etnaviv_device.c mesa-20.2.6/src/etnaviv/drm/etnaviv_device.c --- mesa-20.2.1/src/etnaviv/drm/etnaviv_device.c 2020-10-14 17:19:10.369850000 +0000 +++ mesa-20.2.6/src/etnaviv/drm/etnaviv_device.c 2020-12-16 21:42:03.637110000 +0000 @@ -30,8 +30,6 @@ #include "etnaviv_priv.h" #include "etnaviv_drmif.h" -static pthread_mutex_t etna_drm_table_lock = PTHREAD_MUTEX_INITIALIZER; - struct etna_device *etna_device_new(int fd) { struct etna_device *dev = calloc(sizeof(*dev), 1); diff -Nru mesa-20.2.1/src/etnaviv/drm/etnaviv_priv.h mesa-20.2.6/src/etnaviv/drm/etnaviv_priv.h --- mesa-20.2.1/src/etnaviv/drm/etnaviv_priv.h 2020-10-14 17:19:10.369850000 +0000 +++ mesa-20.2.6/src/etnaviv/drm/etnaviv_priv.h 2020-12-16 21:42:03.638110200 +0000 @@ -50,6 +50,8 @@ #include "etnaviv_drmif.h" #include "drm-uapi/etnaviv_drm.h" +extern pthread_mutex_t etna_drm_table_lock; + struct etna_bo_bucket { uint32_t size; struct list_head list; diff -Nru mesa-20.2.1/src/freedreno/computerator/meson.build mesa-20.2.6/src/freedreno/computerator/meson.build --- mesa-20.2.1/src/freedreno/computerator/meson.build 2020-10-14 17:19:10.379850100 +0000 +++ mesa-20.2.6/src/freedreno/computerator/meson.build 2020-12-16 21:42:03.648110200 +0000 @@ -23,6 +23,7 @@ 'ir3_asm.c', 'main.c', freedreno_xml_header_files, + ir3_parser[1], ] computerator = executable( diff -Nru mesa-20.2.1/src/freedreno/fdl/fd6_layout.c mesa-20.2.6/src/freedreno/fdl/fd6_layout.c --- mesa-20.2.1/src/freedreno/fdl/fd6_layout.c 2020-10-14 17:19:10.383183500 +0000 +++ mesa-20.2.6/src/freedreno/fdl/fd6_layout.c 2020-12-16 21:42:03.650110000 +0000 @@ -208,7 +208,7 @@ * may not be. note this only matters if last level is linear */ if (level == mip_levels - 1) - height = align(nblocksy, 4); + nblocksy = align(nblocksy, 4); slice->offset = offset + layout->size; diff -Nru mesa-20.2.1/src/freedreno/ir3/disasm-a3xx.c mesa-20.2.6/src/freedreno/ir3/disasm-a3xx.c --- mesa-20.2.1/src/freedreno/ir3/disasm-a3xx.c 2020-10-14 17:19:10.383183500 +0000 +++ mesa-20.2.6/src/freedreno/ir3/disasm-a3xx.c 2020-12-16 21:42:03.651110200 +0000 @@ -266,7 +266,7 @@ * assigned to shader: */ fullreg = (fullreg + 3) / 4; - halfreg = (halfreg + 3) / 4; + halfreg = ctx->regs.used.mergedregs ? 0 : (halfreg + 3) / 4; // Note this count of instructions includes rptN, which matches // up to how mesa prints this: diff -Nru mesa-20.2.1/src/freedreno/ir3/ir3.c mesa-20.2.6/src/freedreno/ir3/ir3.c --- mesa-20.2.1/src/freedreno/ir3/ir3.c 2020-10-14 17:19:10.383183500 +0000 +++ mesa-20.2.6/src/freedreno/ir3/ir3.c 2020-12-16 21:42:03.651110200 +0000 @@ -986,7 +986,7 @@ return ptr; fail: - free(ptr); + ralloc_free(ptr); return NULL; } diff -Nru mesa-20.2.1/src/freedreno/ir3/ir3_disk_cache.c mesa-20.2.6/src/freedreno/ir3/ir3_disk_cache.c --- mesa-20.2.1/src/freedreno/ir3/ir3_disk_cache.c 2020-10-14 17:19:10.386516800 +0000 +++ mesa-20.2.6/src/freedreno/ir3/ir3_disk_cache.c 2020-12-16 21:42:03.653110000 +0000 @@ -126,7 +126,7 @@ * pointers need special handling: */ - v->bin = malloc(4 * v->info.sizedwords); + v->bin = rzalloc_size(v, 4 * v->info.sizedwords); blob_copy_bytes(blob, v->bin, 4 * v->info.sizedwords); if (!v->binning_pass) { diff -Nru mesa-20.2.1/src/gallium/auxiliary/Android.mk mesa-20.2.6/src/gallium/auxiliary/Android.mk --- mesa-20.2.1/src/gallium/auxiliary/Android.mk 2020-10-14 17:19:10.399850100 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/Android.mk 2020-12-16 21:42:03.667110200 +0000 @@ -36,7 +36,9 @@ $(VL_STUB_SOURCES) ifeq ($(USE_LIBBACKTRACE),true) - LOCAL_SRC_FILES += util/u_debug_stack_android.cpp + LOCAL_CFLAGS += -DHAVE_ANDROID_PLATFORM + LOCAL_SHARED_LIBRARIES += libbacktrace + LOCAL_SRC_FILES += ../../util/u_debug_stack_android.cpp endif LOCAL_C_INCLUDES := \ diff -Nru mesa-20.2.1/src/gallium/auxiliary/cso_cache/cso_context.c mesa-20.2.6/src/gallium/auxiliary/cso_cache/cso_context.c --- mesa-20.2.1/src/gallium/auxiliary/cso_cache/cso_context.c 2020-10-14 17:19:10.399850100 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/cso_cache/cso_context.c 2020-12-16 21:42:03.667110200 +0000 @@ -371,22 +371,53 @@ { static struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { NULL }; + static struct pipe_shader_buffer ssbos[PIPE_MAX_SHADER_BUFFERS] = { 0 }; static void *zeros[PIPE_MAX_SAMPLERS] = { NULL }; struct pipe_screen *scr = ctx->pipe->screen; enum pipe_shader_type sh; for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { + switch (sh) { + case PIPE_SHADER_GEOMETRY: + if (!ctx->has_geometry_shader) + continue; + break; + case PIPE_SHADER_TESS_CTRL: + case PIPE_SHADER_TESS_EVAL: + if (!ctx->has_tessellation) + continue; + break; + case PIPE_SHADER_COMPUTE: + if (!ctx->has_compute_shader) + continue; + break; + default: + break; + } + int maxsam = scr->get_shader_param(scr, sh, PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS); int maxview = scr->get_shader_param(scr, sh, PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS); + int maxssbo = scr->get_shader_param(scr, sh, + PIPE_SHADER_CAP_MAX_SHADER_BUFFERS); + int maxcb = scr->get_shader_param(scr, sh, + PIPE_SHADER_CAP_MAX_CONST_BUFFERS); assert(maxsam <= PIPE_MAX_SAMPLERS); assert(maxview <= PIPE_MAX_SHADER_SAMPLER_VIEWS); + assert(maxssbo <= PIPE_MAX_SHADER_BUFFERS); + assert(maxcb <= PIPE_MAX_CONSTANT_BUFFERS); if (maxsam > 0) { ctx->pipe->bind_sampler_states(ctx->pipe, sh, 0, maxsam, zeros); } if (maxview > 0) { ctx->pipe->set_sampler_views(ctx->pipe, sh, 0, maxview, views); } + if (maxssbo > 0) { + ctx->pipe->set_shader_buffers(ctx->pipe, sh, 0, maxssbo, ssbos, 0); + } + for (int i = 0; i < maxcb; i++) { + ctx->pipe->set_constant_buffer(ctx->pipe, sh, i, NULL); + } } } @@ -397,17 +428,13 @@ ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, NULL); if (ctx->has_geometry_shader) { ctx->pipe->bind_gs_state(ctx->pipe, NULL); - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_GEOMETRY, 0, NULL); } if (ctx->has_tessellation) { ctx->pipe->bind_tcs_state(ctx->pipe, NULL); - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_CTRL, 0, NULL); ctx->pipe->bind_tes_state(ctx->pipe, NULL); - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL); } if (ctx->has_compute_shader) { ctx->pipe->bind_compute_state(ctx->pipe, NULL); - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_COMPUTE, 0, NULL); } ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); diff -Nru mesa-20.2.1/src/gallium/auxiliary/draw/draw_tess.c mesa-20.2.6/src/gallium/auxiliary/draw/draw_tess.c --- mesa-20.2.1/src/gallium/auxiliary/draw/draw_tess.c 2020-10-14 17:19:10.403183500 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/draw/draw_tess.c 2020-12-16 21:42:03.672110000 +0000 @@ -350,9 +350,6 @@ shader->input_vertex_stride = input_stride; shader->input_info = input_info; - if (shader->draw->collect_statistics) { - shader->draw->statistics.ds_invocations += input_prim->primitive_count; - } #ifdef LLVM_AVAILABLE struct pipe_tessellation_factors factors; struct pipe_tessellator_data data = { 0 }; @@ -394,6 +391,10 @@ output += vert_start * vertex_size; llvm_tes_run(shader, i, num_input_vertices_per_patch, &data, &factors, (struct vertex_header *)output); + if (shader->draw->collect_statistics) { + shader->draw->statistics.ds_invocations += data.num_domain_points; + } + uint32_t prim_len = u_prim_vertex_count(output_prims->prim)->min; output_prims->primitive_count += data.num_indices / prim_len; output_prims->primitive_lengths = REALLOC(output_prims->primitive_lengths, prim_start * sizeof(uint32_t), diff -Nru mesa-20.2.1/src/gallium/auxiliary/gallivm/lp_bld_limits.h mesa-20.2.6/src/gallium/auxiliary/gallivm/lp_bld_limits.h --- mesa-20.2.1/src/gallium/auxiliary/gallivm/lp_bld_limits.h 2020-10-14 17:19:10.413183700 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/gallivm/lp_bld_limits.h 2020-12-16 21:42:03.682110000 +0000 @@ -110,7 +110,7 @@ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: return LP_MAX_TGSI_CONST_BUFFER_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return PIPE_MAX_CONSTANT_BUFFERS; + return LP_MAX_TGSI_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_TEMPS: return LP_MAX_TGSI_TEMPS; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: diff -Nru mesa-20.2.1/src/gallium/auxiliary/gallivm/lp_bld_nir.c mesa-20.2.6/src/gallium/auxiliary/gallivm/lp_bld_nir.c --- mesa-20.2.1/src/gallium/auxiliary/gallivm/lp_bld_nir.c 2020-10-14 17:19:10.413183700 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/gallivm/lp_bld_nir.c 2020-12-16 21:42:03.683110200 +0000 @@ -555,6 +555,7 @@ case nir_op_flog2: result = lp_build_log2_safe(&bld_base->base, src[0]); break; + case nir_op_flt: case nir_op_flt32: result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src); break; @@ -1975,8 +1976,8 @@ nir_foreach_register(reg, &func->impl->registers) { LLVMTypeRef type = get_register_type(bld_base, reg); - LLVMValueRef reg_alloc = lp_build_alloca_undef(bld_base->base.gallivm, - type, "reg"); + LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm, + type, "reg"); _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc); } nir_index_ssa_defs(func->impl); diff -Nru mesa-20.2.1/src/gallium/auxiliary/os/os_process.c mesa-20.2.6/src/gallium/auxiliary/os/os_process.c --- mesa-20.2.1/src/gallium/auxiliary/os/os_process.c 2020-10-14 17:19:10.419850300 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/os/os_process.c 2020-12-16 21:42:03.689110300 +0000 @@ -122,7 +122,7 @@ } #elif defined(PIPE_OS_LINUX) int f = open("/proc/self/cmdline", O_RDONLY); - if (f) { + if (f != -1) { const int n = read(f, cmdline, size - 1); int i; assert(n < size); diff -Nru mesa-20.2.1/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h mesa-20.2.6/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h --- mesa-20.2.1/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h 2020-10-14 17:19:10.419850300 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h 2020-12-16 21:42:03.689110300 +0000 @@ -22,6 +22,7 @@ DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false") DRI_CONF_DISABLE_ARB_GPU_SHADER5("false") DRI_CONF_FORCE_GLSL_VERSION(0) + DRI_CONF_ALLOW_EXTRA_PP_TOKENS("false") DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false") DRI_CONF_ALLOW_GLSL_120_SUBSET_IN_110("false") DRI_CONF_ALLOW_GLSL_BUILTIN_CONST_EXPRESSION("false") diff -Nru mesa-20.2.1/src/gallium/auxiliary/util/u_blitter.c mesa-20.2.6/src/gallium/auxiliary/util/u_blitter.c --- mesa-20.2.1/src/gallium/auxiliary/util/u_blitter.c 2020-10-14 17:19:10.429850300 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/util/u_blitter.c 2020-12-16 21:42:03.699110300 +0000 @@ -2721,7 +2721,7 @@ { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; - struct pipe_framebuffer_state fb_state; + struct pipe_framebuffer_state fb_state = { 0 }; ctx->custom_vs = custom_vs; @@ -2747,7 +2747,6 @@ fb_state.height = dstsurf->height; fb_state.nr_cbufs = 1; fb_state.cbufs[0] = dstsurf; - fb_state.zsbuf = 0; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); diff -Nru mesa-20.2.1/src/gallium/auxiliary/vl/vl_video_buffer.h mesa-20.2.6/src/gallium/auxiliary/vl/vl_video_buffer.h --- mesa-20.2.1/src/gallium/auxiliary/vl/vl_video_buffer.h 2020-10-14 17:19:10.436517200 +0000 +++ mesa-20.2.6/src/gallium/auxiliary/vl/vl_video_buffer.h 2020-12-16 21:42:03.708110000 +0000 @@ -54,14 +54,14 @@ bool interlaced) { if (interlaced) { - *height /= 2; + *height = align(*height, 2) / 2; } if (plane > 0) { if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { - *width /= 2; - *height /= 2; + *width = align(*width, 2) / 2; + *height = align(*height, 2) / 2; } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { - *width /= 2; + *width = align(*width, 2) / 2; } } } diff -Nru mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_blt.c mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_blt.c --- mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_blt.c 2020-10-14 17:19:10.439850600 +0000 +++ mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_blt.c 2020-12-16 21:42:03.709110300 +0000 @@ -229,7 +229,7 @@ if (surf->surf.ts_size) { clr.dest.use_ts = 1; clr.dest.ts_addr.bo = res->ts_bo; - clr.dest.ts_addr.offset = 0; + clr.dest.ts_addr.offset = surf->level->ts_offset; clr.dest.ts_addr.flags = ETNA_RELOC_WRITE; clr.dest.ts_clear_value[0] = new_clear_value; clr.dest.ts_clear_value[1] = new_clear_value >> 32; @@ -308,7 +308,7 @@ if (surf->surf.ts_size) { clr.dest.use_ts = 1; clr.dest.ts_addr.bo = res->ts_bo; - clr.dest.ts_addr.offset = 0; + clr.dest.ts_addr.offset = surf->level->ts_offset; clr.dest.ts_addr.flags = ETNA_RELOC_WRITE; clr.dest.ts_clear_value[0] = surf->level->clear_value; clr.dest.ts_clear_value[1] = surf->level->clear_value; diff -Nru mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c --- mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c 2020-10-14 17:19:10.439850600 +0000 +++ mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c 2020-12-16 21:42:03.709110300 +0000 @@ -1117,8 +1117,6 @@ NIR_PASS_V(s, nir_lower_bool_to_int32); } - etna_optimize_loop(s); - if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) nir_print_shader(s, stdout); diff -Nru mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_shader.c mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_shader.c --- mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_shader.c 2020-10-14 17:19:10.443184000 +0000 +++ mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_shader.c 2020-12-16 21:42:03.712110300 +0000 @@ -445,6 +445,7 @@ etna_destroy_shader(t); } + tgsi_free_tokens(shader->tokens); ralloc_free(shader->nir); FREE(shader); } diff -Nru mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_texture_desc.c mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_texture_desc.c --- mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_texture_desc.c 2020-10-14 17:19:10.443184000 +0000 +++ mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_texture_desc.c 2020-12-16 21:42:03.713110200 +0000 @@ -83,6 +83,8 @@ if (!cs) return NULL; + cs->base = *ss; + cs->SAMP_CTRL0 = VIVS_NTE_DESCRIPTOR_SAMP_CTRL0_UWRAP(translate_texture_wrapmode(ss->wrap_s)) | VIVS_NTE_DESCRIPTOR_SAMP_CTRL0_VWRAP(translate_texture_wrapmode(ss->wrap_t)) | @@ -290,15 +292,16 @@ if ((1 << x) & active_samplers) { struct etna_sampler_state_desc *ss = etna_sampler_state_desc(ctx->sampler[x]); struct etna_sampler_view_desc *sv = etna_sampler_view_desc(ctx->sampler_view[x]); + uint32_t SAMP_CTRL0 = ss->SAMP_CTRL0 | sv->SAMP_CTRL0; if (texture_use_int_filter(&sv->base, &ss->base, true)) - sv->SAMP_CTRL0 |= VIVS_NTE_DESCRIPTOR_SAMP_CTRL0_INT_FILTER; + SAMP_CTRL0 |= VIVS_NTE_DESCRIPTOR_SAMP_CTRL0_INT_FILTER; etna_set_state(stream, VIVS_NTE_DESCRIPTOR_TX_CTRL(x), COND(sv->ts.enable, VIVS_NTE_DESCRIPTOR_TX_CTRL_TS_ENABLE) | VIVS_NTE_DESCRIPTOR_TX_CTRL_TS_MODE(sv->ts.mode) | VIVS_NTE_DESCRIPTOR_TX_CTRL_TS_INDEX(x)); - etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_CTRL0(x), ss->SAMP_CTRL0 | sv->SAMP_CTRL0); + etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_CTRL0(x), SAMP_CTRL0); etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_CTRL1(x), ss->SAMP_CTRL1 | sv->SAMP_CTRL1); etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_LOD_MINMAX(x), ss->SAMP_LOD_MINMAX); etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_LOD_BIAS(x), ss->SAMP_LOD_BIAS); diff -Nru mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.c mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_texture_state.c --- mesa-20.2.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.c 2020-10-14 17:19:10.443184000 +0000 +++ mesa-20.2.6/src/gallium/drivers/etnaviv/etnaviv_texture_state.c 2020-12-16 21:42:03.713110200 +0000 @@ -93,6 +93,8 @@ if (!cs) return NULL; + cs->base = *ss; + cs->TE_SAMPLER_CONFIG0 = VIVS_TE_SAMPLER_CONFIG0_UWRAP(translate_texture_wrapmode(ss->wrap_s)) | VIVS_TE_SAMPLER_CONFIG0_VWRAP(translate_texture_wrapmode(ss->wrap_t)) | @@ -348,11 +350,12 @@ if ((1 << x) & active_samplers) { ss = etna_sampler_state(ctx->sampler[x]); sv = etna_sampler_view(ctx->sampler_view[x]); + uint32_t TE_SAMPLER_LOG_SIZE = sv->TE_SAMPLER_LOG_SIZE; if (texture_use_int_filter(&sv->base, &ss->base, false)) - sv->TE_SAMPLER_LOG_SIZE |= VIVS_TE_SAMPLER_LOG_SIZE_INT_FILTER; + TE_SAMPLER_LOG_SIZE |= VIVS_TE_SAMPLER_LOG_SIZE_INT_FILTER; - /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), sv->TE_SAMPLER_LOG_SIZE); + /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), TE_SAMPLER_LOG_SIZE); } } } diff -Nru mesa-20.2.1/src/gallium/drivers/freedreno/a6xx/fd6_texture.c mesa-20.2.6/src/gallium/drivers/freedreno/a6xx/fd6_texture.c --- mesa-20.2.1/src/gallium/drivers/freedreno/a6xx/fd6_texture.c 2020-10-14 17:19:10.453184000 +0000 +++ mesa-20.2.6/src/gallium/drivers/freedreno/a6xx/fd6_texture.c 2020-12-16 21:42:03.725110300 +0000 @@ -450,6 +450,7 @@ if (rsc->seqno == state->key.view[i].rsc_seqno) { fd6_texture_state_destroy(entry->data); _mesa_hash_table_remove(fd6_ctx->tex_cache, entry); + break; } } } diff -Nru mesa-20.2.1/src/gallium/drivers/freedreno/freedreno_gmem.c mesa-20.2.6/src/gallium/drivers/freedreno/freedreno_gmem.c --- mesa-20.2.1/src/gallium/drivers/freedreno/freedreno_gmem.c 2020-10-14 17:19:10.453184000 +0000 +++ mesa-20.2.6/src/gallium/drivers/freedreno/freedreno_gmem.c 2020-12-16 21:42:03.727110100 +0000 @@ -521,11 +521,15 @@ struct fd_screen *screen = batch->ctx->screen; struct fd_gmem_cache *cache = &screen->gmem_cache; struct fd_gmem_stateobj *gmem = NULL; - struct gmem_key *key = gmem_key_init(batch, assume_zs, no_scis_opt); - uint32_t hash = gmem_key_hash(key); + /* Lock before allocating gmem_key, since that a screen-wide + * ralloc pool and ralloc itself is not thread-safe. + */ fd_screen_lock(screen); + struct gmem_key *key = gmem_key_init(batch, assume_zs, no_scis_opt); + uint32_t hash = gmem_key_hash(key); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(cache->ht, hash, key); if (entry) { diff -Nru mesa-20.2.1/src/gallium/drivers/freedreno/freedreno_resource.c mesa-20.2.6/src/gallium/drivers/freedreno/freedreno_resource.c --- mesa-20.2.1/src/gallium/drivers/freedreno/freedreno_resource.c 2020-10-14 17:19:10.456517200 +0000 +++ mesa-20.2.6/src/gallium/drivers/freedreno/freedreno_resource.c 2020-12-16 21:42:03.727110100 +0000 @@ -933,8 +933,12 @@ * should.) */ bool allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count); - if (tmpl->bind & PIPE_BIND_SHARED) + if (tmpl->bind & PIPE_BIND_SHARED) { allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count); + if (!allow_ubwc) { + linear = true; + } + } allow_ubwc &= !(fd_mesa_debug & FD_DBG_NOUBWC); diff -Nru mesa-20.2.1/src/gallium/drivers/freedreno/freedreno_screen.c mesa-20.2.6/src/gallium/drivers/freedreno/freedreno_screen.c --- mesa-20.2.1/src/gallium/drivers/freedreno/freedreno_screen.c 2020-10-14 17:19:10.456517200 +0000 +++ mesa-20.2.6/src/gallium/drivers/freedreno/freedreno_screen.c 2020-12-16 21:42:03.728110300 +0000 @@ -160,6 +160,8 @@ simple_mtx_destroy(&screen->lock); + u_transfer_helper_destroy(pscreen->transfer_helper); + if (screen->compiler) ir3_compiler_destroy(screen->compiler); diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_blit.c mesa-20.2.6/src/gallium/drivers/iris/iris_blit.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_blit.c 2020-10-14 17:19:10.459850500 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_blit.c 2020-12-16 21:42:03.734110000 +0000 @@ -485,15 +485,28 @@ else main_mask = PIPE_MASK_RGBA; + float src_z_step = (float)info->src.box.depth / (float)info->dst.box.depth; + + /* There is no interpolation to the pixel center during rendering, so + * add the 0.5 offset ourselves here. + */ + float depth_center_offset = 0; + if (src_res->surf.dim == ISL_SURF_DIM_3D) + depth_center_offset = 0.5 / info->dst.box.depth * info->src.box.depth; + if (info->mask & main_mask) { for (int slice = 0; slice < info->dst.box.depth; slice++) { + unsigned dst_z = info->dst.box.z + slice; + float src_z = info->src.box.z + slice * src_z_step + + depth_center_offset; + iris_batch_maybe_flush(batch, 1500); iris_batch_sync_region_start(batch); blorp_blit(&blorp_batch, - &src_surf, info->src.level, info->src.box.z + slice, + &src_surf, info->src.level, src_z, src_fmt.fmt, src_fmt.swizzle, - &dst_surf, info->dst.level, info->dst.box.z + slice, + &dst_surf, info->dst.level, dst_z, dst_fmt.fmt, dst_fmt.swizzle, src_x0, src_y0, src_x1, src_y1, dst_x0, dst_y0, dst_x1, dst_y1, diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_border_color.c mesa-20.2.6/src/gallium/drivers/iris/iris_border_color.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_border_color.c 2020-10-14 17:19:10.459850500 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_border_color.c 2020-12-16 21:42:03.734110000 +0000 @@ -147,7 +147,7 @@ memcpy(pool->map + offset, color, sizeof(*color)); pool->insert_point += BC_ALIGNMENT; - _mesa_hash_table_insert_pre_hashed(pool->ht, hash, color, + _mesa_hash_table_insert_pre_hashed(pool->ht, hash, pool->map + offset, (void *) (uintptr_t) offset); return offset; } diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_bufmgr.c mesa-20.2.6/src/gallium/drivers/iris/iris_bufmgr.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_bufmgr.c 2020-10-14 17:19:10.459850500 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_bufmgr.c 2020-12-16 21:42:03.734110000 +0000 @@ -1940,7 +1940,8 @@ } bufmgr = iris_bufmgr_create(devinfo, fd, bo_reuse); - list_addtail(&bufmgr->link, &global_bufmgr_list); + if (bufmgr) + list_addtail(&bufmgr->link, &global_bufmgr_list); unlock: mtx_unlock(&global_bufmgr_list_mutex); diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_clear.c mesa-20.2.6/src/gallium/drivers/iris/iris_clear.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_clear.c 2020-10-14 17:19:10.459850500 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_clear.c 2020-12-16 21:42:03.734110000 +0000 @@ -107,10 +107,6 @@ return false; } - /* XXX: if (irb->mt->supports_fast_clear) - * see intel_miptree_create_for_dri_image() - */ - if (!iris_is_color_fast_clear_compatible(ice, res->surf.format, color)) return false; @@ -537,6 +533,7 @@ iris_resource_set_aux_state(ice, res, level, box->z, box->depth, ISL_AUX_STATE_CLEAR); ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER; + ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; } static void diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_context.c mesa-20.2.6/src/gallium/drivers/iris/iris_context.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_context.c 2020-10-14 17:19:10.459850500 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_context.c 2020-12-16 21:42:03.734110000 +0000 @@ -185,6 +185,49 @@ out_value[1] = u.a.y[sample_index]; } +static bool +create_dirty_dmabuf_set(struct iris_context *ice) +{ + assert(ice->dirty_dmabufs == NULL); + + ice->dirty_dmabufs = _mesa_pointer_set_create(ice); + return ice->dirty_dmabufs != NULL; +} + +void +iris_mark_dirty_dmabuf(struct iris_context *ice, + struct pipe_resource *res) +{ + if (!_mesa_set_search(ice->dirty_dmabufs, res)) { + _mesa_set_add(ice->dirty_dmabufs, res); + pipe_reference(NULL, &res->reference); + } +} + +static void +clear_dirty_dmabuf_set(struct iris_context *ice) +{ + set_foreach(ice->dirty_dmabufs, entry) { + struct pipe_resource *res = (struct pipe_resource *)entry->key; + if (pipe_reference(&res->reference, NULL)) + res->screen->resource_destroy(res->screen, res); + } + + _mesa_set_clear(ice->dirty_dmabufs, NULL); +} + +void +iris_flush_dirty_dmabufs(struct iris_context *ice) +{ + set_foreach(ice->dirty_dmabufs, entry) { + struct pipe_resource *res = (struct pipe_resource *)entry->key; + ice->ctx.flush_resource(&ice->ctx, res); + } + + clear_dirty_dmabuf_set(ice); +} + + /** * Destroy a context, freeing any associated memory. */ @@ -197,6 +240,8 @@ if (ctx->stream_uploader) u_upload_destroy(ctx->stream_uploader); + clear_dirty_dmabuf_set(ice); + screen->vtbl.destroy_state(ice); iris_destroy_program_cache(ice); iris_destroy_border_color_pool(ice); @@ -261,6 +306,11 @@ } ctx->const_uploader = ctx->stream_uploader; + if (!create_dirty_dmabuf_set(ice)) { + ralloc_free(ice); + return NULL; + } + ctx->destroy = iris_destroy_context; ctx->set_debug_callback = iris_set_debug_callback; ctx->set_device_reset_callback = iris_set_device_reset_callback; diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_context.h mesa-20.2.6/src/gallium/drivers/iris/iris_context.h --- mesa-20.2.1/src/gallium/drivers/iris/iris_context.h 2020-10-14 17:19:10.459850500 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_context.h 2020-12-16 21:42:03.734110000 +0000 @@ -25,6 +25,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" +#include "util/set.h" #include "util/slab.h" #include "util/u_debug.h" #include "intel/blorp/blorp.h" @@ -536,6 +537,9 @@ /** A device reset status callback for notifying that the GPU is hosed. */ struct pipe_device_reset_callback reset; + /** A set of dmabuf resources dirtied beyond their default aux-states. */ + struct set *dirty_dmabufs; + /** Slab allocator for iris_transfer_map objects. */ struct slab_child_pool transfer_pool; @@ -772,6 +776,10 @@ void iris_lost_context_state(struct iris_batch *batch); +void iris_mark_dirty_dmabuf(struct iris_context *ice, + struct pipe_resource *res); +void iris_flush_dirty_dmabufs(struct iris_context *ice); + void iris_init_blit_functions(struct pipe_context *ctx); void iris_init_clear_functions(struct pipe_context *ctx); void iris_init_program_functions(struct pipe_context *ctx); diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_fence.c mesa-20.2.6/src/gallium/drivers/iris/iris_fence.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_fence.c 2020-10-14 17:19:10.459850500 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_fence.c 2020-12-16 21:42:03.735110300 +0000 @@ -154,7 +154,7 @@ if (syncobj != nth_syncobj) { *syncobj = *nth_syncobj; - memcpy(nth_fence, fence, sizeof(*fence)); + memcpy(fence, nth_fence, sizeof(*fence)); } } } @@ -241,6 +241,8 @@ } } + iris_flush_dirty_dmabufs(ice); + if (!deferred) { for (unsigned i = 0; i < IRIS_BATCH_COUNT; i++) iris_batch_flush(&ice->batches[i]); diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_genx_protos.h mesa-20.2.6/src/gallium/drivers/iris/iris_genx_protos.h --- mesa-20.2.1/src/gallium/drivers/iris/iris_genx_protos.h 2020-10-14 17:19:10.463183900 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_genx_protos.h 2020-12-16 21:42:03.735110300 +0000 @@ -29,6 +29,7 @@ /* iris_state.c */ void genX(init_state)(struct iris_context *ice); +void genX(init_screen_state)(struct iris_screen *screen); void genX(emit_hashing_mode)(struct iris_context *ice, struct iris_batch *batch, unsigned width, unsigned height, diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_query.c mesa-20.2.6/src/gallium/drivers/iris/iris_query.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_query.c 2020-10-14 17:19:10.463183900 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_query.c 2020-12-16 21:42:03.737110100 +0000 @@ -484,6 +484,7 @@ iris_syncobj_reference(screen, &query->syncobj, NULL); screen->base.fence_reference(ctx->screen, &query->fence, NULL); } + pipe_resource_reference(&query->query_state_ref.res, NULL); free(query); } diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_resolve.c mesa-20.2.6/src/gallium/drivers/iris/iris_resolve.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_resolve.c 2020-10-14 17:19:10.463183900 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_resolve.c 2020-12-16 21:42:03.737110100 +0000 @@ -94,7 +94,7 @@ while (views) { const int i = u_bit_scan(&views); struct iris_sampler_view *isv = shs->textures[i]; - struct iris_resource *res = (void *) isv->base.texture; + struct iris_resource *res = isv->res; if (res->base.target != PIPE_BUFFER) { if (consider_framebuffer) { @@ -808,6 +808,15 @@ ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; } } + + if (res->mod_info && !res->mod_info->supports_clear_color) { + assert(res->mod_info->aux_usage != ISL_AUX_USAGE_NONE); + if (aux_state == ISL_AUX_STATE_CLEAR || + aux_state == ISL_AUX_STATE_COMPRESSED_CLEAR || + aux_state == ISL_AUX_STATE_PARTIAL_CLEAR) { + iris_mark_dirty_dmabuf(ice, &res->base); + } + } } enum isl_aux_usage diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_screen.c mesa-20.2.6/src/gallium/drivers/iris/iris_screen.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_screen.c 2020-10-14 17:19:10.463183900 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_screen.c 2020-12-16 21:42:03.737110100 +0000 @@ -56,6 +56,24 @@ #include "intel/common/gen_l3_config.h" #include "iris_monitor.h" +#define genX_call(devinfo, func, ...) \ + switch (devinfo.gen) { \ + case 12: \ + gen12_##func(__VA_ARGS__); \ + break; \ + case 11: \ + gen11_##func(__VA_ARGS__); \ + break; \ + case 9: \ + gen9_##func(__VA_ARGS__); \ + break; \ + case 8: \ + gen8_##func(__VA_ARGS__); \ + break; \ + default: \ + unreachable("Unknown hardware generation"); \ + } + static void iris_flush_frontbuffer(struct pipe_screen *_screen, struct pipe_resource *resource, @@ -779,5 +797,7 @@ pscreen->get_driver_query_group_info = iris_get_monitor_group_info; pscreen->get_driver_query_info = iris_get_monitor_info; + genX_call(screen->devinfo, init_screen_state, screen); + return pscreen; } diff -Nru mesa-20.2.1/src/gallium/drivers/iris/iris_state.c mesa-20.2.6/src/gallium/drivers/iris/iris_state.c --- mesa-20.2.1/src/gallium/drivers/iris/iris_state.c 2020-10-14 17:19:10.463183900 +0000 +++ mesa-20.2.6/src/gallium/drivers/iris/iris_state.c 2020-12-16 21:42:03.739110200 +0000 @@ -7686,6 +7686,41 @@ } void +genX(init_screen_state)(struct iris_screen *screen) +{ + screen->vtbl.destroy_state = iris_destroy_state; + screen->vtbl.init_render_context = iris_init_render_context; + screen->vtbl.init_compute_context = iris_init_compute_context; + screen->vtbl.upload_render_state = iris_upload_render_state; + screen->vtbl.update_surface_base_address = iris_update_surface_base_address; + screen->vtbl.upload_compute_state = iris_upload_compute_state; + screen->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; + screen->vtbl.emit_mi_report_perf_count = iris_emit_mi_report_perf_count; + screen->vtbl.rebind_buffer = iris_rebind_buffer; + screen->vtbl.load_register_reg32 = iris_load_register_reg32; + screen->vtbl.load_register_reg64 = iris_load_register_reg64; + screen->vtbl.load_register_imm32 = iris_load_register_imm32; + screen->vtbl.load_register_imm64 = iris_load_register_imm64; + screen->vtbl.load_register_mem32 = iris_load_register_mem32; + screen->vtbl.load_register_mem64 = iris_load_register_mem64; + screen->vtbl.store_register_mem32 = iris_store_register_mem32; + screen->vtbl.store_register_mem64 = iris_store_register_mem64; + screen->vtbl.store_data_imm32 = iris_store_data_imm32; + screen->vtbl.store_data_imm64 = iris_store_data_imm64; + screen->vtbl.copy_mem_mem = iris_copy_mem_mem; + screen->vtbl.derived_program_state_size = iris_derived_program_state_size; + screen->vtbl.store_derived_program_state = iris_store_derived_program_state; + screen->vtbl.create_so_decl_list = iris_create_so_decl_list; + screen->vtbl.populate_vs_key = iris_populate_vs_key; + screen->vtbl.populate_tcs_key = iris_populate_tcs_key; + screen->vtbl.populate_tes_key = iris_populate_tes_key; + screen->vtbl.populate_gs_key = iris_populate_gs_key; + screen->vtbl.populate_fs_key = iris_populate_fs_key; + screen->vtbl.populate_cs_key = iris_populate_cs_key; + screen->vtbl.lost_genx_state = iris_lost_genx_state; +} + +void genX(init_state)(struct iris_context *ice) { struct pipe_context *ctx = &ice->ctx; @@ -7731,37 +7766,6 @@ ctx->set_stream_output_targets = iris_set_stream_output_targets; ctx->set_frontend_noop = iris_set_frontend_noop; - screen->vtbl.destroy_state = iris_destroy_state; - screen->vtbl.init_render_context = iris_init_render_context; - screen->vtbl.init_compute_context = iris_init_compute_context; - screen->vtbl.upload_render_state = iris_upload_render_state; - screen->vtbl.update_surface_base_address = iris_update_surface_base_address; - screen->vtbl.upload_compute_state = iris_upload_compute_state; - screen->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; - screen->vtbl.emit_mi_report_perf_count = iris_emit_mi_report_perf_count; - screen->vtbl.rebind_buffer = iris_rebind_buffer; - screen->vtbl.load_register_reg32 = iris_load_register_reg32; - screen->vtbl.load_register_reg64 = iris_load_register_reg64; - screen->vtbl.load_register_imm32 = iris_load_register_imm32; - screen->vtbl.load_register_imm64 = iris_load_register_imm64; - screen->vtbl.load_register_mem32 = iris_load_register_mem32; - screen->vtbl.load_register_mem64 = iris_load_register_mem64; - screen->vtbl.store_register_mem32 = iris_store_register_mem32; - screen->vtbl.store_register_mem64 = iris_store_register_mem64; - screen->vtbl.store_data_imm32 = iris_store_data_imm32; - screen->vtbl.store_data_imm64 = iris_store_data_imm64; - screen->vtbl.copy_mem_mem = iris_copy_mem_mem; - screen->vtbl.derived_program_state_size = iris_derived_program_state_size; - screen->vtbl.store_derived_program_state = iris_store_derived_program_state; - screen->vtbl.create_so_decl_list = iris_create_so_decl_list; - screen->vtbl.populate_vs_key = iris_populate_vs_key; - screen->vtbl.populate_tcs_key = iris_populate_tcs_key; - screen->vtbl.populate_tes_key = iris_populate_tes_key; - screen->vtbl.populate_gs_key = iris_populate_gs_key; - screen->vtbl.populate_fs_key = iris_populate_fs_key; - screen->vtbl.populate_cs_key = iris_populate_cs_key; - screen->vtbl.lost_genx_state = iris_lost_genx_state; - ice->state.dirty = ~0ull; ice->state.stage_dirty = ~0ull; diff -Nru mesa-20.2.1/src/gallium/drivers/llvmpipe/lp_setup_point.c mesa-20.2.6/src/gallium/drivers/llvmpipe/lp_setup_point.c --- mesa-20.2.1/src/gallium/drivers/llvmpipe/lp_setup_point.c 2020-10-14 17:19:10.473184000 +0000 +++ mesa-20.2.6/src/gallium/drivers/llvmpipe/lp_setup_point.c 2020-12-16 21:42:03.748110300 +0000 @@ -447,6 +447,10 @@ bbox.x1, bbox.y1); } + if (lp_context->active_statistics_queries) { + lp_context->pipeline_statistics.c_primitives++; + } + if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); @@ -469,10 +473,6 @@ LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries) { - lp_context->pipeline_statistics.c_primitives++; - } - if (draw_will_inject_frontface(lp_context->draw) && setup->face_slot > 0) { point->inputs.frontfacing = v0[setup->face_slot][0]; diff -Nru mesa-20.2.1/src/gallium/drivers/panfrost/pan_job.c mesa-20.2.6/src/gallium/drivers/panfrost/pan_job.c --- mesa-20.2.1/src/gallium/drivers/panfrost/pan_job.c 2020-10-14 17:19:10.499851000 +0000 +++ mesa-20.2.6/src/gallium/drivers/panfrost/pan_job.c 2020-12-16 21:42:03.775110200 +0000 @@ -44,7 +44,7 @@ * better GPU utilization. * * Each accessed BO has a corresponding entry in the ->accessed_bos hash table. - * A BO is either being written or read at any time (see if writer != NULL). + * A BO is either being written or read at any time (see last_is_write). * When the last access is a write, the batch writing the BO might have read * dependencies (readers that have not been executed yet and want to read the * previous BO content), and when the last access is a read, all readers might @@ -58,6 +58,7 @@ struct panfrost_bo_access { struct util_dynarray readers; struct panfrost_batch_fence *writer; + bool last_is_write; }; static struct panfrost_batch_fence * @@ -397,7 +398,7 @@ entry = _mesa_hash_table_search(ctx->accessed_bos, bo); access = entry ? entry->data : NULL; if (access) { - old_writes = access->writer != NULL; + old_writes = access->last_is_write; } else { access = rzalloc(ctx, struct panfrost_bo_access); util_dynarray_init(&access->readers, access); @@ -477,7 +478,6 @@ util_dynarray_append(&access->readers, struct panfrost_batch_fence *, batch->out_sync); - access->writer = NULL; } } else { /* We already accessed this BO before, so we should already be @@ -502,6 +502,8 @@ if (access->writer) panfrost_batch_add_dep(batch, access->writer); } + + access->last_is_write = writes; } void diff -Nru mesa-20.2.1/src/gallium/drivers/r600/r600_pipe_common.c mesa-20.2.6/src/gallium/drivers/r600/r600_pipe_common.c --- mesa-20.2.1/src/gallium/drivers/r600/r600_pipe_common.c 2020-10-14 17:19:10.516517600 +0000 +++ mesa-20.2.6/src/gallium/drivers/r600/r600_pipe_common.c 2020-12-16 21:42:03.792110200 +0000 @@ -1286,10 +1286,6 @@ rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0); - int has_draw_use_llvm = debug_get_bool_option("DRAW_USE_LLVM", FALSE); - if (!has_draw_use_llvm) - setenv("DRAW_USE_LLVM", "no", 0); - r600_disk_cache_create(rscreen); diff -Nru mesa-20.2.1/src/gallium/drivers/r600/r600_state_common.c mesa-20.2.6/src/gallium/drivers/r600/r600_state_common.c --- mesa-20.2.1/src/gallium/drivers/r600/r600_state_common.c 2020-10-14 17:19:10.519851000 +0000 +++ mesa-20.2.6/src/gallium/drivers/r600/r600_state_common.c 2020-12-16 21:42:03.795110200 +0000 @@ -2062,7 +2062,7 @@ unsigned index_size = info->index_size; int index_bias; struct r600_shader_atomic combined_atomics[8]; - uint8_t atomic_used_mask; + uint8_t atomic_used_mask = 0; if (!info->indirect && !info->count && (index_size || !info->count_from_stream_output)) { return; diff -Nru mesa-20.2.1/src/gallium/drivers/radeon/radeon_vce_52.c mesa-20.2.6/src/gallium/drivers/radeon/radeon_vce_52.c --- mesa-20.2.1/src/gallium/drivers/radeon/radeon_vce_52.c 2020-10-14 17:19:10.529851200 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeon/radeon_vce_52.c 2020-12-16 21:42:03.805110200 +0000 @@ -47,7 +47,17 @@ enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num; enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den; enc->enc_pic.rc.max_qp = 51; - enc->enc_pic.rc.vbv_buffer_size = pic->rate_ctrl.vbv_buffer_size; + + /* For CBR mode, to guarantee bitrate of generated stream complies with + * target bitrate (e.g. no over +/-10%), vbv_buffer_size should be same + * as target bitrate. + */ + if (enc->enc_pic.rc.rc_method == PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT) { + enc->enc_pic.rc.vbv_buffer_size = pic->rate_ctrl.target_bitrate; + } else { + enc->enc_pic.rc.vbv_buffer_size = pic->rate_ctrl.vbv_buffer_size; + } + enc->enc_pic.rc.vbv_buf_lv = pic->rate_ctrl.vbv_buf_lv; enc->enc_pic.rc.fill_data_enable = pic->rate_ctrl.fill_data_enable; enc->enc_pic.rc.enforce_hrd = pic->rate_ctrl.enforce_hrd; diff -Nru mesa-20.2.1/src/gallium/drivers/radeon/radeon_vce.c mesa-20.2.6/src/gallium/drivers/radeon/radeon_vce.c --- mesa-20.2.1/src/gallium/drivers/radeon/radeon_vce.c 2020-10-14 17:19:10.529851200 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeon/radeon_vce.c 2020-12-16 21:42:03.804110300 +0000 @@ -265,7 +265,9 @@ enc->pic.quant_i_frames != pic->quant_i_frames || enc->pic.quant_p_frames != pic->quant_p_frames || enc->pic.quant_b_frames != pic->quant_b_frames || - enc->pic.rate_ctrl.target_bitrate != pic->rate_ctrl.target_bitrate; + enc->pic.rate_ctrl.target_bitrate != pic->rate_ctrl.target_bitrate || + enc->pic.rate_ctrl.frame_rate_num != pic->rate_ctrl.frame_rate_num || + enc->pic.rate_ctrl.frame_rate_den != pic->rate_ctrl.frame_rate_den; enc->pic = *pic; enc->si_get_pic_param(enc, pic); diff -Nru mesa-20.2.1/src/gallium/drivers/radeon/radeon_vcn_dec.c mesa-20.2.6/src/gallium/drivers/radeon/radeon_vcn_dec.c --- mesa-20.2.1/src/gallium/drivers/radeon/radeon_vcn_dec.c 2020-10-14 17:19:10.529851200 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeon/radeon_vcn_dec.c 2020-12-16 21:42:03.805110200 +0000 @@ -397,7 +397,7 @@ for (i = 0; i < 3; ++i) prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i]; - prbs->seg.abs_delta = 0; + prbs->seg.abs_delta = pic->picture_parameter.abs_delta; } else memset(&prbs->seg, 0, 256); @@ -1264,7 +1264,7 @@ : (4096 * 3000 * 3 / 2) * max_references; if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) - dpb_size *= (3 / 2); + dpb_size = dpb_size * 3 / 2; break; case PIPE_VIDEO_FORMAT_JPEG: diff -Nru mesa-20.2.1/src/gallium/drivers/radeon/radeon_vcn_enc.c mesa-20.2.6/src/gallium/drivers/radeon/radeon_vcn_enc.c --- mesa-20.2.1/src/gallium/drivers/radeon/radeon_vcn_enc.c 2020-10-14 17:19:10.529851200 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeon/radeon_vcn_enc.c 2020-12-16 21:42:03.805110200 +0000 @@ -271,7 +271,9 @@ if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) { struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; need_rate_control = - enc->enc_pic.rc_layer_init.target_bit_rate != pic->rate_ctrl.target_bitrate; + (enc->enc_pic.rc_layer_init.target_bit_rate != pic->rate_ctrl.target_bitrate) || + (enc->enc_pic.rc_layer_init.frame_rate_num != pic->rate_ctrl.frame_rate_num) || + (enc->enc_pic.rc_layer_init.frame_rate_den != pic->rate_ctrl.frame_rate_den); } else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) { struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; need_rate_control = enc->enc_pic.rc_layer_init.target_bit_rate != pic->rc.target_bitrate; diff -Nru mesa-20.2.1/src/gallium/drivers/radeonsi/si_gfx_cs.c mesa-20.2.6/src/gallium/drivers/radeonsi/si_gfx_cs.c --- mesa-20.2.1/src/gallium/drivers/radeonsi/si_gfx_cs.c 2020-10-14 17:19:10.533184500 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeonsi/si_gfx_cs.c 2020-12-16 21:42:03.809110400 +0000 @@ -403,6 +403,10 @@ ctx->flags |= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 | SI_CONTEXT_START_PIPELINE_STATS; + /* We don't know if the last draw call used GS fast launch, so assume it didn't. */ + if (ctx->ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) + ctx->flags |= SI_CONTEXT_VGT_FLUSH; + radeon_add_to_buffer_list(ctx, ctx->gfx_cs, ctx->border_color_buffer, RADEON_USAGE_READ, RADEON_PRIO_BORDER_COLORS); if (ctx->shadowed_regs) { diff -Nru mesa-20.2.1/src/gallium/drivers/radeonsi/si_pipe.c mesa-20.2.6/src/gallium/drivers/radeonsi/si_pipe.c --- mesa-20.2.1/src/gallium/drivers/radeonsi/si_pipe.c 2020-10-14 17:19:10.533184500 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeonsi/si_pipe.c 2020-12-16 21:42:03.810110300 +0000 @@ -477,25 +477,22 @@ if (!sctx->ctx) goto fail; + /* SDMA causes corruption on: : + * - RX 580: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1399, 1889 + * - gfx9 APUs: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2814 + * - gfx10: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1907, + https://gitlab.freedesktop.org/drm/amd/issues/892 + * + * While we could keep buffer copies and clears enabled, let's disable + * everything because SDMA decreases CPU performance because of its + * command submission overhead. + * + * And SDMA is disabled on all chips (instead of just the ones listed above), + * because it doesn't make sense to keep it enabled on old chips only + * that are not tested as often as newer chips. + */ if (sscreen->info.num_rings[RING_DMA] && !(sscreen->debug_flags & DBG(NO_SDMA)) && - /* SDMA causes corruption on RX 580: - * https://gitlab.freedesktop.org/mesa/mesa/-/issues/1399 - * https://gitlab.freedesktop.org/mesa/mesa/-/issues/1889 - */ - (sctx->chip_class != GFX8 || sscreen->debug_flags & DBG(FORCE_SDMA)) && - /* SDMA causes corruption on gfx9 APUs: - * https://gitlab.freedesktop.org/mesa/mesa/-/issues/2814 - * - * While we could keep buffer copies and clears enabled, let's disable - * everything, because neither gfx8 nor gfx10 enable SDMA, and it's not - * easy to test. - */ - (sctx->chip_class != GFX9 || sscreen->debug_flags & DBG(FORCE_SDMA)) && - /* SDMA timeouts sometimes on gfx10 so disable it for now. See: - * https://bugs.freedesktop.org/show_bug.cgi?id=111481 - * https://gitlab.freedesktop.org/mesa/mesa/-/issues/1907 - */ - (sctx->chip_class != GFX10 || sscreen->debug_flags & DBG(FORCE_SDMA))) { + sscreen->debug_flags & DBG(FORCE_SDMA)) { sctx->sdma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA, (void *)si_flush_dma_cs, sctx, stop_exec_on_failure); } diff -Nru mesa-20.2.1/src/gallium/drivers/radeonsi/si_pm4.c mesa-20.2.6/src/gallium/drivers/radeonsi/si_pm4.c --- mesa-20.2.1/src/gallium/drivers/radeonsi/si_pm4.c 2020-10-14 17:19:10.533184500 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeonsi/si_pm4.c 2020-12-16 21:42:03.810110300 +0000 @@ -38,6 +38,7 @@ { assert(state->ndw < SI_PM4_MAX_DW); state->pm4[state->ndw++] = dw; + state->last_opcode = -1; } static void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate) @@ -76,13 +77,15 @@ reg >>= 2; + assert(state->ndw + 2 <= SI_PM4_MAX_DW); + if (opcode != state->last_opcode || reg != (state->last_reg + 1)) { si_pm4_cmd_begin(state, opcode); - si_pm4_cmd_add(state, reg); + state->pm4[state->ndw++] = reg; } state->last_reg = reg; - si_pm4_cmd_add(state, val); + state->pm4[state->ndw++] = val; si_pm4_cmd_end(state, false); } diff -Nru mesa-20.2.1/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c mesa-20.2.6/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c --- mesa-20.2.1/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c 2020-10-14 17:19:10.536517900 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c 2020-12-16 21:42:03.812110400 +0000 @@ -278,6 +278,7 @@ void *cs = ctx->create_compute_state(ctx, &state); ureg_destroy(ureg); + ureg_free_tokens(state.prog); return cs; } diff -Nru mesa-20.2.1/src/gallium/drivers/radeonsi/si_shader_nir.c mesa-20.2.6/src/gallium/drivers/radeonsi/si_shader_nir.c --- mesa-20.2.1/src/gallium/drivers/radeonsi/si_shader_nir.c 2020-10-14 17:19:10.536517900 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeonsi/si_shader_nir.c 2020-12-16 21:42:03.812110400 +0000 @@ -288,6 +288,8 @@ case nir_intrinsic_bindless_image_atomic_xor: case nir_intrinsic_bindless_image_atomic_exchange: case nir_intrinsic_bindless_image_atomic_comp_swap: + case nir_intrinsic_bindless_image_atomic_inc_wrap: + case nir_intrinsic_bindless_image_atomic_dec_wrap: info->uses_bindless_images = true; info->writes_memory = true; info->num_memory_instructions++; /* we only care about stores */ diff -Nru mesa-20.2.1/src/gallium/drivers/radeonsi/si_state_draw.c mesa-20.2.6/src/gallium/drivers/radeonsi/si_state_draw.c --- mesa-20.2.1/src/gallium/drivers/radeonsi/si_state_draw.c 2020-10-14 17:19:10.536517900 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeonsi/si_state_draw.c 2020-12-16 21:42:03.814110300 +0000 @@ -668,23 +668,25 @@ if (sctx->ngg) { if (sctx->tes_shader.cso) { ge_cntl = S_03096C_PRIM_GRP_SIZE(num_patches) | - S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */ + S_03096C_VERT_GRP_SIZE(0) | S_03096C_BREAK_WAVE_AT_EOI(key.u.tess_uses_prim_id); } else { ge_cntl = si_get_vs_state(sctx)->ge_cntl; } } else { unsigned primgroup_size; - unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */ - ; + unsigned vertgroup_size; if (sctx->tes_shader.cso) { primgroup_size = num_patches; /* must be a multiple of NUM_PATCHES */ + vertgroup_size = 0; } else if (sctx->gs_shader.cso) { unsigned vgt_gs_onchip_cntl = sctx->gs_shader.current->ctx_reg.gs.vgt_gs_onchip_cntl; primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl); + vertgroup_size = G_028A44_ES_VERTS_PER_SUBGRP(vgt_gs_onchip_cntl); } else { primgroup_size = 128; /* recommended without a GS and tess */ + vertgroup_size = 0; } ge_cntl = S_03096C_PRIM_GRP_SIZE(primgroup_size) | S_03096C_VERT_GRP_SIZE(vertgroup_size) | @@ -1865,6 +1867,7 @@ } /* Update NGG culling settings. */ + uint8_t old_ngg_culling = sctx->ngg_culling; if (sctx->ngg && !dispatch_prim_discard_cs && rast_prim == PIPE_PRIM_TRIANGLES && !sctx->gs_shader.cso && /* GS doesn't support NGG culling. */ (sctx->screen->always_use_ngg_culling_all || @@ -1876,7 +1879,7 @@ (prim == PIPE_PRIM_TRIANGLES || prim == PIPE_PRIM_TRIANGLE_STRIP) && !sctx->tes_shader.cso)) && si_get_vs(sctx)->cso->ngg_culling_allowed) { - unsigned ngg_culling = 0; + uint8_t ngg_culling = 0; if (rs->rasterizer_discard) { ngg_culling |= SI_NGG_CULL_FRONT_FACE | SI_NGG_CULL_BACK_FACE; @@ -1905,22 +1908,33 @@ ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP; } - if (ngg_culling != sctx->ngg_culling) { - /* Insert a VGT_FLUSH when enabling fast launch changes to prevent hangs. - * See issues #2418, #2426, #2434 - */ - if (ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) - sctx->flags |= SI_CONTEXT_VGT_FLUSH; + if (ngg_culling != old_ngg_culling) { + /* If shader compilation is not ready, this setting will be rejected. */ sctx->ngg_culling = ngg_culling; sctx->do_update_shaders = true; } - } else if (sctx->ngg_culling) { + } else if (old_ngg_culling) { sctx->ngg_culling = false; sctx->do_update_shaders = true; } - if (sctx->do_update_shaders && !si_update_shaders(sctx)) - goto return_cleanup; + if (unlikely(sctx->do_update_shaders)) { + if (unlikely(!si_update_shaders(sctx))) + goto return_cleanup; + + /* Insert a VGT_FLUSH when enabling fast launch changes to prevent hangs. + * See issues #2418, #2426, #2434 + * + * This is the setting that is used by the draw. + */ + uint8_t ngg_culling = si_get_vs(sctx)->current->key.opt.ngg_culling; + if (!(old_ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) && + ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) + sctx->flags |= SI_CONTEXT_VGT_FLUSH; + + /* Set this to the correct value determined by si_update_shaders. */ + sctx->ngg_culling = ngg_culling; + } si_need_gfx_cs_space(sctx); diff -Nru mesa-20.2.1/src/gallium/drivers/radeonsi/si_state_shaders.c mesa-20.2.6/src/gallium/drivers/radeonsi/si_state_shaders.c --- mesa-20.2.1/src/gallium/drivers/radeonsi/si_state_shaders.c 2020-10-14 17:19:10.539851200 +0000 +++ mesa-20.2.6/src/gallium/drivers/radeonsi/si_state_shaders.c 2020-12-16 21:42:03.814110300 +0000 @@ -1128,7 +1128,10 @@ pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) | S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_DX10_CLAMP(1) | - S_00B228_MEM_ORDERED(1) | S_00B228_WGP_MODE(1) | + S_00B228_MEM_ORDERED(1) | + /* Disable the WGP mode on gfx10.3 because it can hang. (it happened on VanGogh) + * Let's disable it on all chips that disable exactly 1 CU per SA for GS. */ + S_00B228_WGP_MODE(sscreen->info.chip_class == GFX10) | S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) | @@ -1243,7 +1246,7 @@ S_03096C_VERT_GRP_SIZE(shader->ngg.max_gsprims + 2); } else { shader->ge_cntl = S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) | - S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */ + S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) | S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi); /* Bug workaround for a possible hang with non-tessellation cases. @@ -2954,8 +2957,13 @@ * VGT_FLUSH is also emitted at the beginning of IBs when legacy GS ring * pointers are set. */ - if (sctx->chip_class == GFX10 && !new_ngg) + if ((sctx->chip_class == GFX10 || sctx->family == CHIP_SIENNA_CICHLID) && !new_ngg) { sctx->flags |= SI_CONTEXT_VGT_FLUSH; + if (sctx->chip_class == GFX10) { + /* Workaround for https://gitlab.freedesktop.org/mesa/mesa/-/issues/2941 */ + si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); + } + } sctx->ngg = new_ngg; sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */ diff -Nru mesa-20.2.1/src/gallium/drivers/softpipe/sp_tex_sample.c mesa-20.2.6/src/gallium/drivers/softpipe/sp_tex_sample.c --- mesa-20.2.1/src/gallium/drivers/softpipe/sp_tex_sample.c 2020-10-14 17:19:10.543184500 +0000 +++ mesa-20.2.6/src/gallium/drivers/softpipe/sp_tex_sample.c 2020-12-16 21:42:03.818110200 +0000 @@ -1638,7 +1638,7 @@ case PIPE_SWIZZLE_0: return 0.0; case PIPE_SWIZZLE_1: - return 1.0; + return sp_sview->oneval; default: return tx[chan][swizzle]; } @@ -2884,12 +2884,12 @@ float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { + struct sp_sampler_view *sp_sview = (struct sp_sampler_view *)sview; int j; const unsigned swizzle_r = sview->swizzle_r; const unsigned swizzle_g = sview->swizzle_g; const unsigned swizzle_b = sview->swizzle_b; const unsigned swizzle_a = sview->swizzle_a; - float oneval = util_format_is_pure_integer(sview->format) ? uif(1) : 1.0f; switch (swizzle_r) { case PIPE_SWIZZLE_0: @@ -2898,7 +2898,7 @@ break; case PIPE_SWIZZLE_1: for (j = 0; j < 4; j++) - out[0][j] = oneval; + out[0][j] = sp_sview->oneval; break; default: assert(swizzle_r < 4); @@ -2913,7 +2913,7 @@ break; case PIPE_SWIZZLE_1: for (j = 0; j < 4; j++) - out[1][j] = oneval; + out[1][j] = sp_sview->oneval; break; default: assert(swizzle_g < 4); @@ -2928,7 +2928,7 @@ break; case PIPE_SWIZZLE_1: for (j = 0; j < 4; j++) - out[2][j] = oneval; + out[2][j] = sp_sview->oneval; break; default: assert(swizzle_b < 4); @@ -2943,7 +2943,7 @@ break; case PIPE_SWIZZLE_1: for (j = 0; j < 4; j++) - out[3][j] = oneval; + out[3][j] = sp_sview->oneval; break; default: assert(swizzle_a < 4); @@ -3638,6 +3638,8 @@ sview->xpot = util_logbase2( resource->width0 ); sview->ypot = util_logbase2( resource->height0 ); + + sview->oneval = util_format_is_pure_integer(view->format) ? uif(1) : 1.0f; } return (struct pipe_sampler_view *) sview; diff -Nru mesa-20.2.1/src/gallium/drivers/softpipe/sp_tex_sample.h mesa-20.2.6/src/gallium/drivers/softpipe/sp_tex_sample.h --- mesa-20.2.1/src/gallium/drivers/softpipe/sp_tex_sample.h 2020-10-14 17:19:10.543184500 +0000 +++ mesa-20.2.6/src/gallium/drivers/softpipe/sp_tex_sample.h 2020-12-16 21:42:03.818110200 +0000 @@ -122,6 +122,8 @@ compute_lambda_func compute_lambda; compute_lambda_from_grad_func compute_lambda_from_grad; union pipe_color_union border_color; + /* Value to use for PIPE_SWIZZLE_1 (integer vs float) */ + float oneval; }; struct sp_filter_funcs { diff -Nru mesa-20.2.1/src/gallium/drivers/svga/svga_resource_buffer_upload.c mesa-20.2.6/src/gallium/drivers/svga/svga_resource_buffer_upload.c --- mesa-20.2.1/src/gallium/drivers/svga/svga_resource_buffer_upload.c 2020-10-14 17:19:10.546517800 +0000 +++ mesa-20.2.6/src/gallium/drivers/svga/svga_resource_buffer_upload.c 2020-12-16 21:42:03.823110300 +0000 @@ -229,12 +229,12 @@ /* Add the new surface to the buffer surface list */ ret = svga_buffer_add_host_surface(sbuf, sbuf->handle, &sbuf->key, bind_flags); - } - if (ss->sws->have_gb_objects) { - /* Initialize the surface with zero */ - ss->sws->surface_init(ss->sws, sbuf->handle, svga_surface_size(&sbuf->key), - sbuf->key.flags); + if (ss->sws->have_gb_objects) { + /* Initialize the surface with zero */ + ss->sws->surface_init(ss->sws, sbuf->handle, svga_surface_size(&sbuf->key), + sbuf->key.flags); + } } return ret; diff -Nru mesa-20.2.1/src/gallium/drivers/vc4/vc4_blit.c mesa-20.2.6/src/gallium/drivers/vc4/vc4_blit.c --- mesa-20.2.1/src/gallium/drivers/vc4/vc4_blit.c 2020-10-14 17:19:10.573184700 +0000 +++ mesa-20.2.6/src/gallium/drivers/vc4/vc4_blit.c 2020-12-16 21:42:03.848110400 +0000 @@ -299,6 +299,7 @@ nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL); load->src[0] = nir_src_for_ssa(one); load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset)); + nir_intrinsic_set_align(load, 4, 0); nir_builder_instr_insert(&b, &load->instr); nir_store_var(&b, color_out, diff -Nru mesa-20.2.1/src/gallium/drivers/vc4/vc4_program.c mesa-20.2.6/src/gallium/drivers/vc4/vc4_program.c --- mesa-20.2.1/src/gallium/drivers/vc4/vc4_program.c 2020-10-14 17:19:10.576518000 +0000 +++ mesa-20.2.6/src/gallium/drivers/vc4/vc4_program.c 2020-12-16 21:42:03.850110300 +0000 @@ -2472,7 +2472,8 @@ if (s->info.stage == MESA_SHADER_VERTEX) NIR_PASS_V(s, nir_lower_point_size, 1.0f, 0.0f); - NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, + NIR_PASS_V(s, nir_lower_io, + nir_var_shader_in | nir_var_shader_out | nir_var_uniform, type_size, (nir_lower_io_options)0); NIR_PASS_V(s, nir_lower_regs_to_ssa); diff -Nru mesa-20.2.1/src/gallium/drivers/zink/zink_blit.c mesa-20.2.6/src/gallium/drivers/zink/zink_blit.c --- mesa-20.2.1/src/gallium/drivers/zink/zink_blit.c 2020-10-14 17:19:10.579851600 +0000 +++ mesa-20.2.6/src/gallium/drivers/zink/zink_blit.c 2020-12-16 21:42:03.855110400 +0000 @@ -43,19 +43,35 @@ region.srcSubresource.aspectMask = src->aspect; region.srcSubresource.mipLevel = info->src.level; - region.srcSubresource.baseArrayLayer = 0; // no clue - region.srcSubresource.layerCount = 1; // no clue region.srcOffset.x = info->src.box.x; region.srcOffset.y = info->src.box.y; - region.srcOffset.z = info->src.box.z; + + if (src->base.array_size > 1) { + region.srcOffset.z = 0; + region.srcSubresource.baseArrayLayer = info->src.box.z; + region.srcSubresource.layerCount = info->src.box.depth; + } else { + assert(info->src.box.depth == 1); + region.srcOffset.z = info->src.box.z; + region.srcSubresource.baseArrayLayer = 0; + region.srcSubresource.layerCount = 1; + } region.dstSubresource.aspectMask = dst->aspect; region.dstSubresource.mipLevel = info->dst.level; - region.dstSubresource.baseArrayLayer = 0; // no clue - region.dstSubresource.layerCount = 1; // no clue region.dstOffset.x = info->dst.box.x; region.dstOffset.y = info->dst.box.y; - region.dstOffset.z = info->dst.box.z; + + if (dst->base.array_size > 1) { + region.dstOffset.z = 0; + region.dstSubresource.baseArrayLayer = info->dst.box.z; + region.dstSubresource.layerCount = info->dst.box.depth; + } else { + assert(info->dst.box.depth == 1); + region.dstOffset.z = info->dst.box.z; + region.dstSubresource.baseArrayLayer = 0; + region.dstSubresource.layerCount = 1; + } region.extent.width = info->dst.box.width; region.extent.height = info->dst.box.height; diff -Nru mesa-20.2.1/src/gallium/drivers/zink/zink_resource.c mesa-20.2.6/src/gallium/drivers/zink/zink_resource.c --- mesa-20.2.1/src/gallium/drivers/zink/zink_resource.c 2020-10-14 17:19:10.583185000 +0000 +++ mesa-20.2.6/src/gallium/drivers/zink/zink_resource.c 2020-12-16 21:42:03.857110500 +0000 @@ -523,9 +523,12 @@ vkGetImageSubresourceLayout(screen->dev, res->image, &isr, &srl); trans->base.stride = srl.rowPitch; trans->base.layer_stride = srl.arrayPitch; - ptr = ((uint8_t *)ptr) + box->z * srl.depthPitch + - box->y * srl.rowPitch + - box->x; + const struct util_format_description *desc = util_format_description(res->base.format); + unsigned offset = srl.offset + + box->z * srl.depthPitch + + (box->y / desc->block.height) * srl.rowPitch + + (box->x / desc->block.width) * (desc->block.bits / 8); + ptr = ((uint8_t *)ptr) + offset; } } diff -Nru mesa-20.2.1/src/gallium/drivers/zink/zink_screen.c mesa-20.2.6/src/gallium/drivers/zink/zink_screen.c --- mesa-20.2.1/src/gallium/drivers/zink/zink_screen.c 2020-10-14 17:19:10.583185000 +0000 +++ mesa-20.2.6/src/gallium/drivers/zink/zink_screen.c 2020-12-16 21:42:03.857110500 +0000 @@ -415,7 +415,8 @@ return MIN2(screen->props.limits.maxUniformBufferRange, INT_MAX); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return screen->props.limits.maxPerStageDescriptorUniformBuffers; + return MIN2(screen->props.limits.maxPerStageDescriptorUniformBuffers, + PIPE_MAX_CONSTANT_BUFFERS); case PIPE_SHADER_CAP_MAX_TEMPS: return INT_MAX; diff -Nru mesa-20.2.1/src/gallium/frontends/dri/dri_screen.c mesa-20.2.6/src/gallium/frontends/dri/dri_screen.c --- mesa-20.2.1/src/gallium/frontends/dri/dri_screen.c 2020-10-14 17:19:10.586518300 +0000 +++ mesa-20.2.6/src/gallium/frontends/dri/dri_screen.c 2020-12-16 21:42:03.862110400 +0000 @@ -73,6 +73,8 @@ driQueryOptionb(optionCache, "force_glsl_extensions_warn"); options->force_glsl_version = driQueryOptioni(optionCache, "force_glsl_version"); + options->allow_extra_pp_tokens = + driQueryOptionb(optionCache, "allow_extra_pp_tokens"); options->allow_glsl_extension_directive_midshader = driQueryOptionb(optionCache, "allow_glsl_extension_directive_midshader"); options->allow_glsl_120_subset_in_110 = diff -Nru mesa-20.2.1/src/gallium/frontends/va/config.c mesa-20.2.6/src/gallium/frontends/va/config.c --- mesa-20.2.1/src/gallium/frontends/va/config.c 2020-10-14 17:19:10.596518300 +0000 +++ mesa-20.2.6/src/gallium/frontends/va/config.c 2020-12-16 21:42:03.873110300 +0000 @@ -99,7 +99,7 @@ PIPE_VIDEO_CAP_SUPPORTED)) entrypoint_list[(*num_entrypoints)++] = VAEntrypointEncSlice; - if (num_entrypoints == 0) + if (*num_entrypoints == 0) return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; assert(*num_entrypoints <= ctx->max_entrypoints); diff -Nru mesa-20.2.1/src/gallium/frontends/va/picture_vp9.c mesa-20.2.6/src/gallium/frontends/va/picture_vp9.c --- mesa-20.2.1/src/gallium/frontends/va/picture_vp9.c 2020-10-14 17:19:10.596518300 +0000 +++ mesa-20.2.6/src/gallium/frontends/va/picture_vp9.c 2020-12-16 21:42:03.874110500 +0000 @@ -339,7 +339,7 @@ /* update_data */ if (vp9_u(&vlc, 1)) { /* abs_delta */ - vp9_u(&vlc, 1); + context->desc.vp9.picture_parameter.abs_delta = vp9_u(&vlc, 1); for (i = 0; i < 8; ++i) { /* Use alternate quantizer */ if ((context->desc.vp9.slice_parameter.seg_param[i].alt_quant_enabled = vp9_u(&vlc, 1))) diff -Nru mesa-20.2.1/src/gallium/frontends/va/postproc.c mesa-20.2.6/src/gallium/frontends/va/postproc.c --- mesa-20.2.1/src/gallium/frontends/va/postproc.c 2020-10-14 17:19:10.596518300 +0000 +++ mesa-20.2.6/src/gallium/frontends/va/postproc.c 2020-12-16 21:42:03.874110500 +0000 @@ -321,7 +321,7 @@ VAProcFilterParameterBufferDeinterlacing *deint = buf->data; switch (deint->algorithm) { case VAProcDeinterlacingBob: - if (deint->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST) + if (deint->flags & VA_DEINTERLACING_BOTTOM_FIELD) deinterlace = VL_COMPOSITOR_BOB_BOTTOM; else deinterlace = VL_COMPOSITOR_BOB_TOP; @@ -333,7 +333,7 @@ case VAProcDeinterlacingMotionAdaptive: src = vlVaApplyDeint(drv, context, param, src, - !!(deint->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST)); + !!(deint->flags & VA_DEINTERLACING_BOTTOM_FIELD)); break; default: diff -Nru mesa-20.2.1/src/gallium/frontends/vdpau/mixer.c mesa-20.2.6/src/gallium/frontends/vdpau/mixer.c --- mesa-20.2.1/src/gallium/frontends/vdpau/mixer.c 2020-10-14 17:19:10.599851600 +0000 +++ mesa-20.2.6/src/gallium/frontends/vdpau/mixer.c 2020-12-16 21:42:03.874110500 +0000 @@ -143,7 +143,7 @@ } ret = VDP_STATUS_INVALID_VALUE; if (vmixer->max_layers > 4) { - VDPAU_MSG(VDPAU_WARN, "[VDPAU] Max layers > 4 not supported\n", vmixer->max_layers); + VDPAU_MSG(VDPAU_WARN, "[VDPAU] Max layers %u > 4 not supported\n", vmixer->max_layers); goto no_params; } diff -Nru mesa-20.2.1/src/gallium/include/frontend/api.h mesa-20.2.6/src/gallium/include/frontend/api.h --- mesa-20.2.1/src/gallium/include/frontend/api.h 2020-10-14 17:19:10.603185000 +0000 +++ mesa-20.2.6/src/gallium/include/frontend/api.h 2020-12-16 21:42:03.878110400 +0000 @@ -220,6 +220,7 @@ bool disable_arb_gpu_shader5; bool force_glsl_extensions_warn; unsigned force_glsl_version; + bool allow_extra_pp_tokens; bool allow_glsl_extension_directive_midshader; bool allow_glsl_120_subset_in_110; bool allow_glsl_builtin_const_expression; diff -Nru mesa-20.2.1/src/gallium/include/pipe/p_video_state.h mesa-20.2.6/src/gallium/include/pipe/p_video_state.h --- mesa-20.2.1/src/gallium/include/pipe/p_video_state.h 2020-10-14 17:19:10.603185000 +0000 +++ mesa-20.2.6/src/gallium/include/pipe/p_video_state.h 2020-12-16 21:42:03.879110300 +0000 @@ -764,6 +764,7 @@ int8_t y_dc_delta_q; int8_t uv_ac_delta_q; int8_t uv_dc_delta_q; + uint8_t abs_delta; } picture_parameter; struct { diff -Nru mesa-20.2.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c mesa-20.2.6/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c --- mesa-20.2.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 2020-10-14 17:19:10.609851800 +0000 +++ mesa-20.2.6/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 2020-12-16 21:42:03.886110500 +0000 @@ -810,7 +810,6 @@ ib_size = ib->big_ib_buffer->size - ib->used_ib_space; ib->base.current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs); - assert(ib->base.current.max_dw >= ib->max_check_space_size / 4); ib->base.gpu_address = info->va_start; return true; } @@ -1178,7 +1177,6 @@ ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); ib->base.current.max_dw = ib->big_ib_buffer->size / 4 - cs_epilog_dw; - assert(ib->base.current.max_dw >= ib->max_check_space_size / 4); ib->base.gpu_address = va; amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer, diff -Nru mesa-20.2.1/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c mesa-20.2.6/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c --- mesa-20.2.1/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 2020-10-14 17:19:10.613185200 +0000 +++ mesa-20.2.6/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 2020-12-16 21:42:03.889110300 +0000 @@ -530,6 +530,8 @@ } } + ws->info.num_se = ws->info.max_se; + radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, &ws->info.max_sh_per_se); if (ws->gen == DRV_SI) { diff -Nru mesa-20.2.1/src/gallium/winsys/svga/drm/vmw_surface.c mesa-20.2.6/src/gallium/winsys/svga/drm/vmw_surface.c --- mesa-20.2.1/src/gallium/winsys/svga/drm/vmw_surface.c 2020-10-14 17:19:10.613185200 +0000 +++ mesa-20.2.6/src/gallium/winsys/svga/drm/vmw_surface.c 2020-12-16 21:42:03.890110500 +0000 @@ -44,15 +44,15 @@ struct pb_buffer *pb_buf; uint32_t pb_flags; struct vmw_winsys_screen *vws = vsrf->screen; - pb_flags = PIPE_TRANSFER_READ_WRITE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + pb_flags = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; struct pb_manager *provider; struct pb_desc desc; - data = vmw_svga_winsys_buffer_map(&vws->base, vsrf->buf, - PIPE_TRANSFER_DONTBLOCK | pb_flags); + mtx_lock(&vsrf->mutex); + data = vmw_svga_winsys_buffer_map(&vws->base, vsrf->buf, pb_flags); if (data) - goto out_unlock; + goto out_mapped; provider = vws->pools.mob_fenced; memset(&desc, 0, sizeof(desc)); @@ -64,24 +64,25 @@ data = vmw_svga_winsys_buffer_map(&vws->base, vbuf, pb_flags); if (data) { - if (vsrf->buf) { + vsrf->rebind = TRUE; + if (vsrf->buf) vmw_svga_winsys_buffer_destroy(&vws->base, vsrf->buf); - vsrf->buf = vbuf; - goto out_unlock; - } else - vmw_svga_winsys_buffer_destroy(&vws->base, vbuf); + vsrf->buf = vbuf; + goto out_mapped; + } else { + vmw_svga_winsys_buffer_destroy(&vws->base, vbuf); + goto out_unlock; } } - - data = vmw_svga_winsys_buffer_map(&vws->base, vsrf->buf, pb_flags); - if (data == NULL) + else { + /* Cannot create a buffer, just unlock */ goto out_unlock; + } -out_unlock: +out_mapped: mtx_unlock(&vsrf->mutex); - if (data) - { + if (data) { if (flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT) { memset(data, 0, surf_size + sizeof(SVGA3dDXSOState)); } @@ -89,8 +90,10 @@ memset(data, 0, surf_size); } } + mtx_lock(&vsrf->mutex); vmw_svga_winsys_buffer_unmap(&vsrf->screen->base, vsrf->buf); +out_unlock: mtx_unlock(&vsrf->mutex); } diff -Nru mesa-20.2.1/src/intel/blorp/blorp_blit.c mesa-20.2.6/src/intel/blorp/blorp_blit.c --- mesa-20.2.1/src/intel/blorp/blorp_blit.c 2020-10-14 17:19:10.636518700 +0000 +++ mesa-20.2.6/src/intel/blorp/blorp_blit.c 2020-12-16 21:42:03.913110500 +0000 @@ -56,7 +56,7 @@ LOAD_INPUT(discard_rect, glsl_vec4_type()) LOAD_INPUT(rect_grid, glsl_vec4_type()) LOAD_INPUT(coord_transform, glsl_vec4_type()) - LOAD_INPUT(src_z, glsl_uint_type()) + LOAD_INPUT(src_z, glsl_float_type()) LOAD_INPUT(src_offset, glsl_vector_type(GLSL_TYPE_UINT, 2)) LOAD_INPUT(dst_offset, glsl_vector_type(GLSL_TYPE_UINT, 2)) LOAD_INPUT(src_inv_size, glsl_vector_type(GLSL_TYPE_FLOAT, 2)) @@ -154,8 +154,13 @@ * more explicit in the future. */ assert(pos->num_components >= 2); - pos = nir_vec3(b, nir_channel(b, pos, 0), nir_channel(b, pos, 1), - nir_load_var(b, v->v_src_z)); + if (op == nir_texop_txf || op == nir_texop_txf_ms || op == nir_texop_txf_ms_mcs) { + pos = nir_vec3(b, nir_channel(b, pos, 0), nir_channel(b, pos, 1), + nir_f2i32(b, nir_load_var(b, v->v_src_z))); + } else { + pos = nir_vec3(b, nir_channel(b, pos, 0), nir_channel(b, pos, 1), + nir_load_var(b, v->v_src_z)); + } tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(pos); @@ -2319,7 +2324,7 @@ void blorp_blit(struct blorp_batch *batch, const struct blorp_surf *src_surf, - unsigned src_level, unsigned src_layer, + unsigned src_level, float src_layer, enum isl_format src_format, struct isl_swizzle src_swizzle, const struct blorp_surf *dst_surf, unsigned dst_level, unsigned dst_layer, diff -Nru mesa-20.2.1/src/intel/blorp/blorp.c mesa-20.2.6/src/intel/blorp/blorp.c --- mesa-20.2.1/src/intel/blorp/blorp.c 2020-10-14 17:19:10.636518700 +0000 +++ mesa-20.2.6/src/intel/blorp/blorp.c 2020-12-16 21:42:03.912110600 +0000 @@ -63,7 +63,7 @@ brw_blorp_surface_info_init(struct blorp_context *blorp, struct brw_blorp_surface_info *info, const struct blorp_surf *surf, - unsigned int level, unsigned int layer, + unsigned int level, float layer, enum isl_format format, bool is_render_target) { memset(info, 0, sizeof(*info)); diff -Nru mesa-20.2.1/src/intel/blorp/blorp.h mesa-20.2.6/src/intel/blorp/blorp.h --- mesa-20.2.1/src/intel/blorp/blorp.h 2020-10-14 17:19:10.636518700 +0000 +++ mesa-20.2.6/src/intel/blorp/blorp.h 2020-12-16 21:42:03.912110600 +0000 @@ -133,7 +133,7 @@ void blorp_blit(struct blorp_batch *batch, const struct blorp_surf *src_surf, - unsigned src_level, unsigned src_layer, + unsigned src_level, float src_layer, enum isl_format src_format, struct isl_swizzle src_swizzle, const struct blorp_surf *dst_surf, unsigned dst_level, unsigned dst_layer, diff -Nru mesa-20.2.1/src/intel/blorp/blorp_priv.h mesa-20.2.6/src/intel/blorp/blorp_priv.h --- mesa-20.2.1/src/intel/blorp/blorp_priv.h 2020-10-14 17:19:10.636518700 +0000 +++ mesa-20.2.6/src/intel/blorp/blorp_priv.h 2020-12-16 21:42:03.913110500 +0000 @@ -61,7 +61,7 @@ struct isl_view view; /* Z offset into a 3-D texture or slice of a 2-D array texture. */ - uint32_t z_offset; + float z_offset; uint32_t tile_x_sa, tile_y_sa; }; @@ -70,7 +70,7 @@ brw_blorp_surface_info_init(struct blorp_context *blorp, struct brw_blorp_surface_info *info, const struct blorp_surf *surf, - unsigned int level, unsigned int layer, + unsigned int level, float layer, enum isl_format format, bool is_render_target); void blorp_surf_convert_to_single_slice(const struct isl_device *isl_dev, @@ -148,7 +148,7 @@ /* Minimum layer setting works for all the textures types but texture_3d * for which the setting has no effect. Use the z-coordinate instead. */ - uint32_t src_z; + float src_z; /* Pad out to an integral number of registers */ uint32_t pad[1]; diff -Nru mesa-20.2.1/src/intel/compiler/brw_fs.cpp mesa-20.2.6/src/intel/compiler/brw_fs.cpp --- mesa-20.2.1/src/intel/compiler/brw_fs.cpp 2020-10-14 17:19:10.643185400 +0000 +++ mesa-20.2.6/src/intel/compiler/brw_fs.cpp 2020-12-16 21:42:03.924110400 +0000 @@ -4987,21 +4987,38 @@ */ ubld1.MOV(component(header, 3), sampler_handle); } else if (is_high_sampler(devinfo, sampler)) { + fs_reg sampler_state_ptr = + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD); + + /* Gen11+ sampler message headers include bits in 4:0 which conflict + * with the ones included in g0.3 bits 4:0. Mask them out. + */ + if (devinfo->gen >= 11) { + sampler_state_ptr = ubld1.vgrf(BRW_REGISTER_TYPE_UD); + ubld1.AND(sampler_state_ptr, + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD), + brw_imm_ud(INTEL_MASK(31, 5))); + } + if (sampler.file == BRW_IMMEDIATE_VALUE) { assert(sampler.ud >= 16); const int sampler_state_size = 16; /* 16 bytes */ - ubld1.ADD(component(header, 3), - retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD), + ubld1.ADD(component(header, 3), sampler_state_ptr, brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size)); } else { fs_reg tmp = ubld1.vgrf(BRW_REGISTER_TYPE_UD); ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0)); ubld1.SHL(tmp, tmp, brw_imm_ud(4)); - ubld1.ADD(component(header, 3), - retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD), - tmp); + ubld1.ADD(component(header, 3), sampler_state_ptr, tmp); } + } else if (devinfo->gen >= 11) { + /* Gen11+ sampler message headers include bits in 4:0 which conflict + * with the ones included in g0.3 bits 4:0. Mask them out. + */ + ubld1.AND(component(header, 3), + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD), + brw_imm_ud(INTEL_MASK(31, 5))); } } @@ -7606,7 +7623,8 @@ static const fs_inst * find_halt_control_flow_region_start(const fs_visitor *v) { - if (brw_wm_prog_data(v->prog_data)->uses_kill) { + if (v->stage == MESA_SHADER_FRAGMENT && + brw_wm_prog_data(v->prog_data)->uses_kill) { foreach_block_and_inst(block, fs_inst, inst, v->cfg) { if (inst->opcode == FS_OPCODE_DISCARD_JUMP || inst->opcode == FS_OPCODE_PLACEHOLDER_HALT) diff -Nru mesa-20.2.1/src/intel/compiler/brw_shader.cpp mesa-20.2.6/src/intel/compiler/brw_shader.cpp --- mesa-20.2.1/src/intel/compiler/brw_shader.cpp 2020-10-14 17:19:10.649852000 +0000 +++ mesa-20.2.6/src/intel/compiler/brw_shader.cpp 2020-12-16 21:42:03.935110600 +0000 @@ -924,6 +924,8 @@ case BRW_OPCODE_CBIT: case BRW_OPCODE_FBH: case BRW_OPCODE_FBL: + case BRW_OPCODE_ROL: + case BRW_OPCODE_ROR: case BRW_OPCODE_SUBB: case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_CLUSTER_BROADCAST: diff -Nru mesa-20.2.1/src/intel/dev/gen_device_info.h mesa-20.2.6/src/intel/dev/gen_device_info.h --- mesa-20.2.1/src/intel/dev/gen_device_info.h 2020-10-14 17:19:10.653185400 +0000 +++ mesa-20.2.6/src/intel/dev/gen_device_info.h 2020-12-16 21:42:03.940110400 +0000 @@ -38,7 +38,7 @@ #define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gen10 */ #define GEN_DEVICE_MAX_SUBSLICES (8) /* Maximum on gen11 */ -#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (10) /* Maximum on Haswell */ +#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gen12 */ #define GEN_DEVICE_MAX_PIXEL_PIPES (2) /* Maximum on gen11 */ /** diff -Nru mesa-20.2.1/src/intel/isl/isl.c mesa-20.2.6/src/intel/isl/isl.c --- mesa-20.2.1/src/intel/isl/isl.c 2020-10-14 17:19:10.659852000 +0000 +++ mesa-20.2.6/src/intel/isl/isl.c 2020-12-16 21:42:03.944110400 +0000 @@ -2969,7 +2969,7 @@ case ISL_FORMAT_R32_SINT: return 0x12; case ISL_FORMAT_R32_UINT: return 0x13; case ISL_FORMAT_R32_FLOAT: return 0x11; - case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x11; + case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x13; case ISL_FORMAT_B5G6R5_UNORM: return 0xA; case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA; case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA; diff -Nru mesa-20.2.1/src/intel/isl/isl_format.c mesa-20.2.6/src/intel/isl/isl_format.c --- mesa-20.2.1/src/intel/isl/isl_format.c 2020-10-14 17:19:10.659852000 +0000 +++ mesa-20.2.6/src/intel/isl/isl_format.c 2020-12-16 21:42:03.945110600 +0000 @@ -1272,7 +1272,6 @@ switch (layout->type) { case ISL_UNORM: - unpacked.f32 = _mesa_unorm_to_float(packed, layout->bits); if (colorspace == ISL_COLORSPACE_SRGB) { if (layout->bits == 8) { unpacked.f32 = util_format_srgb_8unorm_to_linear_float(packed); diff -Nru mesa-20.2.1/src/intel/tools/i965_gram.y mesa-20.2.6/src/intel/tools/i965_gram.y --- mesa-20.2.1/src/intel/tools/i965_gram.y 2020-10-14 17:19:10.673185600 +0000 +++ mesa-20.2.6/src/intel/tools/i965_gram.y 2020-12-16 21:42:03.959110500 +0000 @@ -2185,7 +2185,7 @@ | LPAREN exp2 RPAREN { if ($2 > 32 || !isPowerofTwo($2)) - error(&@2, "Invalid execution size %d\n", $2); + error(&@2, "Invalid execution size %llu\n", $2); $$ = cvt($2) - 1; } diff -Nru mesa-20.2.1/src/intel/vulkan/anv_blorp.c mesa-20.2.6/src/intel/vulkan/anv_blorp.c --- mesa-20.2.1/src/intel/vulkan/anv_blorp.c 2020-10-14 17:19:10.689852200 +0000 +++ mesa-20.2.6/src/intel/vulkan/anv_blorp.c 2020-12-16 21:42:03.976110500 +0000 @@ -709,12 +709,19 @@ } bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); - float src_z_step = (float)(src_end + 1 - src_start) / - (float)(dst_end + 1 - dst_start); + const unsigned num_layers = dst_end - dst_start; + float src_z_step = (float)(src_end - src_start) / (float)num_layers; + + /* There is no interpolation to the pixel center during rendering, so + * add the 0.5 offset ourselves here. */ + float depth_center_offset = 0; + if (src_image->type == VK_IMAGE_TYPE_3D) + depth_center_offset = 0.5 / num_layers * (src_end - src_start); if (flip_z) { src_start = src_end; src_z_step *= -1; + depth_center_offset *= -1; } unsigned src_x0 = pRegions[r].srcOffsets[0].x; @@ -729,7 +736,6 @@ unsigned dst_y1 = pRegions[r].dstOffsets[1].y; bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1); - const unsigned num_layers = dst_end - dst_start; anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, 1U << aspect_bit, dst.aux_usage, @@ -738,7 +744,7 @@ for (unsigned i = 0; i < num_layers; i++) { unsigned dst_z = dst_start + i; - unsigned src_z = src_start + i * src_z_step; + float src_z = src_start + i * src_z_step + depth_center_offset; blorp_blit(&batch, &src, src_res->mipLevel, src_z, src_format.isl_format, src_format.swizzle, diff -Nru mesa-20.2.1/src/intel/vulkan/anv_descriptor_set.c mesa-20.2.6/src/intel/vulkan/anv_descriptor_set.c --- mesa-20.2.1/src/intel/vulkan/anv_descriptor_set.c 2020-10-14 17:19:10.689852200 +0000 +++ mesa-20.2.6/src/intel/vulkan/anv_descriptor_set.c 2020-12-16 21:42:03.976110500 +0000 @@ -800,8 +800,10 @@ anv_descriptor_set_layout_unref(device, set->layout); } - if (pool->bo) + if (pool->bo) { + util_vma_heap_finish(&pool->bo_heap); anv_device_release_bo(device, pool->bo); + } anv_state_stream_finish(&pool->surface_state_stream); vk_object_base_finish(&pool->base); diff -Nru mesa-20.2.1/src/intel/vulkan/anv_formats.c mesa-20.2.6/src/intel/vulkan/anv_formats.c --- mesa-20.2.1/src/intel/vulkan/anv_formats.c 2020-10-14 17:19:10.693185600 +0000 +++ mesa-20.2.6/src/intel/vulkan/anv_formats.c 2020-12-16 21:42:03.977110600 +0000 @@ -923,6 +923,31 @@ } } + if (info->flags & VK_IMAGE_CREATE_DISJOINT_BIT) { + /* From the Vulkan 1.2.149 spec, VkImageCreateInfo: + * + * If format is a multi-planar format, and if imageCreateFormatFeatures + * (as defined in Image Creation Limits) does not contain + * VK_FORMAT_FEATURE_DISJOINT_BIT, then flags must not contain + * VK_IMAGE_CREATE_DISJOINT_BIT. + */ + if (format->n_planes > 1 && + !(format_feature_flags & VK_FORMAT_FEATURE_DISJOINT_BIT)) { + goto unsupported; + } + + /* From the Vulkan 1.2.149 spec, VkImageCreateInfo: + * + * If format is not a multi-planar format, and flags does not include + * VK_IMAGE_CREATE_ALIAS_BIT, flags must not contain + * VK_IMAGE_CREATE_DISJOINT_BIT. + */ + if (format->n_planes == 1 && + !(info->flags & VK_IMAGE_CREATE_ALIAS_BIT)) { + goto unsupported; + } + } + if (info->usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { /* Nothing to check. */ } diff -Nru mesa-20.2.1/src/intel/vulkan/anv_gem.c mesa-20.2.6/src/intel/vulkan/anv_gem.c --- mesa-20.2.1/src/intel/vulkan/anv_gem.c 2020-10-14 17:19:10.693185600 +0000 +++ mesa-20.2.6/src/intel/vulkan/anv_gem.c 2020-12-16 21:42:03.978110600 +0000 @@ -105,7 +105,6 @@ if (ret != 0) return MAP_FAILED; - VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); return (void *)(uintptr_t) gem_mmap.addr_ptr; } @@ -116,10 +115,16 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags) { + void *map; if (device->physical->has_mmap_offset) - return anv_gem_mmap_offset(device, gem_handle, offset, size, flags); + map = anv_gem_mmap_offset(device, gem_handle, offset, size, flags); else - return anv_gem_mmap_legacy(device, gem_handle, offset, size, flags); + map = anv_gem_mmap_legacy(device, gem_handle, offset, size, flags); + + if (map != MAP_FAILED) + VG(VALGRIND_MALLOCLIKE_BLOCK(map, size, 0, 1)); + + return map; } /* This is just a wrapper around munmap, but it also notifies valgrind that @@ -128,8 +133,7 @@ void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size) { - if (!device->physical->has_mmap_offset) - VG(VALGRIND_FREELIKE_BLOCK(p, 0)); + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); munmap(p, size); } @@ -422,7 +426,7 @@ { struct drm_prime_handle args = { .handle = gem_handle, - .flags = DRM_CLOEXEC, + .flags = DRM_CLOEXEC | DRM_RDWR, }; int ret = gen_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); diff -Nru mesa-20.2.1/src/intel/vulkan/anv_image.c mesa-20.2.6/src/intel/vulkan/anv_image.c --- mesa-20.2.1/src/intel/vulkan/anv_image.c 2020-10-14 17:19:10.693185600 +0000 +++ mesa-20.2.6/src/intel/vulkan/anv_image.c 2020-12-16 21:42:03.978110600 +0000 @@ -540,14 +540,14 @@ anv_get_format_plane(&device->info, image->vk_format, aspect, image->tiling); struct anv_surface *anv_surf = &image->planes[plane].surface; - const isl_surf_usage_flags_t usage = - choose_isl_surf_usage(image->create_flags, image->usage, - isl_extra_usage_flags, aspect); - VkImageUsageFlags plane_vk_usage = aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? image->stencil_usage : image->usage; + const isl_surf_usage_flags_t usage = + choose_isl_surf_usage(image->create_flags, plane_vk_usage, + isl_extra_usage_flags, aspect); + bool needs_shadow = anv_image_plane_needs_shadow_surface(&device->info, plane_format, diff -Nru mesa-20.2.1/src/intel/vulkan/genX_cmd_buffer.c mesa-20.2.6/src/intel/vulkan/genX_cmd_buffer.c --- mesa-20.2.1/src/intel/vulkan/genX_cmd_buffer.c 2020-10-14 17:19:10.696519100 +0000 +++ mesa-20.2.6/src/intel/vulkan/genX_cmd_buffer.c 2020-12-16 21:42:03.983110700 +0000 @@ -3348,8 +3348,14 @@ if (buffer) { uint32_t stride = dynamic_stride ? cmd_buffer->state.vertex_bindings[vb].stride : pipeline->vb[vb].stride; - uint32_t size = dynamic_size ? - cmd_buffer->state.vertex_bindings[vb].size : buffer->size; + /* From the Vulkan spec (vkCmdBindVertexBuffers2EXT): + * + * "If pname:pSizes is not NULL then pname:pSizes[i] specifies + * the bound size of the vertex buffer starting from the corresponding + * elements of pname:pBuffers[i] plus pname:pOffsets[i]." + */ + UNUSED uint32_t size = dynamic_size ? + cmd_buffer->state.vertex_bindings[vb].size : buffer->size - offset; state = (struct GENX(VERTEX_BUFFER_STATE)) { .VertexBufferIndex = vb, @@ -3365,9 +3371,14 @@ .NullVertexBuffer = offset >= buffer->size, #if GEN_GEN >= 8 - .BufferSize = size - offset + .BufferSize = size, #else - .EndAddress = anv_address_add(buffer->address, size - 1), + /* XXX: to handle dynamic offset for older gens we might want + * to modify Endaddress, but there are issues when doing so: + * + * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7439 + */ + .EndAddress = anv_address_add(buffer->address, buffer->size - 1), #endif }; } else { diff -Nru mesa-20.2.1/src/loader/loader_dri3_helper.c mesa-20.2.6/src/loader/loader_dri3_helper.c --- mesa-20.2.1/src/loader/loader_dri3_helper.c 2020-10-14 17:19:10.696519100 +0000 +++ mesa-20.2.6/src/loader/loader_dri3_helper.c 2020-12-16 21:42:03.985110500 +0000 @@ -272,12 +272,45 @@ } static void -dri3_update_num_back(struct loader_dri3_drawable *draw) +dri3_update_max_num_back(struct loader_dri3_drawable *draw) { - if (draw->last_present_mode == XCB_PRESENT_COMPLETE_MODE_FLIP) - draw->num_back = 3; - else - draw->num_back = 2; + switch (draw->last_present_mode) { + case XCB_PRESENT_COMPLETE_MODE_FLIP: { + int new_max; + + if (draw->swap_interval == 0) + new_max = 4; + else + new_max = 3; + + assert(new_max <= LOADER_DRI3_MAX_BACK); + + if (new_max != draw->max_num_back) { + /* On transition from swap interval == 0 to != 0, start with two + * buffers again. Otherwise keep the current number of buffers. Either + * way, more will be allocated if needed. + */ + if (new_max < draw->max_num_back) + draw->cur_num_back = 2; + + draw->max_num_back = new_max; + } + + break; + } + + case XCB_PRESENT_COMPLETE_MODE_SKIP: + break; + + default: + /* On transition from flips to copies, start with a single buffer again, + * a second one will be allocated if needed + */ + if (draw->max_num_back != 2) + draw->cur_num_back = 1; + + draw->max_num_back = 2; + } } void @@ -395,7 +428,7 @@ } draw->swap_interval = swap_interval; - dri3_update_num_back(draw); + dri3_update_max_num_back(draw); /* Create a new drawable */ draw->dri_drawable = @@ -643,6 +676,7 @@ { int b; int num_to_consider; + int max_num; mtx_lock(&draw->mtx); /* Increase the likelyhood of reusing current buffer */ @@ -651,15 +685,18 @@ /* Check whether we need to reuse the current back buffer as new back. * In that case, wait until it's not busy anymore. */ - num_to_consider = draw->num_back; if (!loader_dri3_have_image_blit(draw) && draw->cur_blit_source != -1) { num_to_consider = 1; + max_num = 1; draw->cur_blit_source = -1; + } else { + num_to_consider = draw->cur_num_back; + max_num = draw->max_num_back; } for (;;) { for (b = 0; b < num_to_consider; b++) { - int id = LOADER_DRI3_BACK_ID((b + draw->cur_back) % draw->num_back); + int id = LOADER_DRI3_BACK_ID((b + draw->cur_back) % draw->cur_num_back); struct loader_dri3_buffer *buffer = draw->buffers[id]; if (!buffer || !buffer->busy) { @@ -668,7 +705,10 @@ return id; } } - if (!dri3_wait_for_event_locked(draw, NULL)) { + + if (num_to_consider < max_num) { + num_to_consider = ++draw->cur_num_back; + } else if (!dri3_wait_for_event_locked(draw, NULL)) { mtx_unlock(&draw->mtx); return -1; } @@ -2006,10 +2046,10 @@ if (!dri3_update_drawable(draw)) return false; - dri3_update_num_back(draw); + dri3_update_max_num_back(draw); /* Free no longer needed back buffers */ - for (buf_id = draw->num_back; buf_id < LOADER_DRI3_MAX_BACK; buf_id++) { + for (buf_id = draw->cur_num_back; buf_id < LOADER_DRI3_MAX_BACK; buf_id++) { if (draw->cur_blit_source != buf_id && draw->buffers[buf_id]) { dri3_free_render_buffer(draw, draw->buffers[buf_id]); draw->buffers[buf_id] = NULL; diff -Nru mesa-20.2.1/src/loader/loader_dri3_helper.h mesa-20.2.6/src/loader/loader_dri3_helper.h --- mesa-20.2.1/src/loader/loader_dri3_helper.h 2020-10-14 17:19:10.696519100 +0000 +++ mesa-20.2.6/src/loader/loader_dri3_helper.h 2020-12-16 21:42:03.985110500 +0000 @@ -146,7 +146,8 @@ struct loader_dri3_buffer *buffers[LOADER_DRI3_NUM_BUFFERS]; int cur_back; - int num_back; + int cur_num_back; + int max_num_back; int cur_blit_source; uint32_t *stamp; diff -Nru mesa-20.2.1/src/mesa/drivers/dri/i965/brw_bufmgr.c mesa-20.2.6/src/mesa/drivers/dri/i965/brw_bufmgr.c --- mesa-20.2.1/src/mesa/drivers/dri/i965/brw_bufmgr.c 2020-10-14 17:19:10.726519300 +0000 +++ mesa-20.2.6/src/mesa/drivers/dri/i965/brw_bufmgr.c 2020-12-16 21:42:04.012110700 +0000 @@ -1947,7 +1947,8 @@ } bufmgr = brw_bufmgr_create(devinfo, fd, bo_reuse); - list_addtail(&bufmgr->link, &global_bufmgr_list); + if (bufmgr) + list_addtail(&bufmgr->link, &global_bufmgr_list); unlock: mtx_unlock(&global_bufmgr_list_mutex); diff -Nru mesa-20.2.1/src/mesa/drivers/dri/i965/intel_screen.c mesa-20.2.6/src/mesa/drivers/dri/i965/intel_screen.c --- mesa-20.2.1/src/mesa/drivers/dri/i965/intel_screen.c 2020-10-14 17:19:10.736519300 +0000 +++ mesa-20.2.6/src/mesa/drivers/dri/i965/intel_screen.c 2020-12-16 21:42:04.023110600 +0000 @@ -84,6 +84,7 @@ DRI_CONF_FORCE_GLSL_VERSION(0) DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false") DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false") + DRI_CONF_ALLOW_EXTRA_PP_TOKENS("false") DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION("false") DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false") DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false") diff -Nru mesa-20.2.1/src/mesa/main/clear.c mesa-20.2.6/src/mesa/main/clear.c --- mesa-20.2.1/src/mesa/main/clear.c 2020-10-14 17:19:10.756519600 +0000 +++ mesa-20.2.6/src/mesa/main/clear.c 2020-12-16 21:42:04.040110600 +0000 @@ -30,6 +30,7 @@ +#include "glformats.h" #include "glheader.h" #include "clear.h" #include "context.h" @@ -589,7 +590,20 @@ * hook instead. */ const GLclampd clearSave = ctx->Depth.Clear; - ctx->Depth.Clear = *value; + + /* Page 263 (page 279 of the PDF) of the OpenGL 3.0 spec says: + * + * "If buffer is DEPTH, drawbuffer must be zero, and value points + * to the single depth value to clear the depth buffer to. + * Clamping and type conversion for fixed-point depth buffers are + * performed in the same fashion as for ClearDepth." + */ + const struct gl_renderbuffer *rb = + ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + const bool is_float_depth = + _mesa_has_depth_float_channel(rb->InternalFormat); + ctx->Depth.Clear = is_float_depth ? *value : SATURATE(*value); + ctx->Driver.Clear(ctx, BUFFER_BIT_DEPTH); ctx->Depth.Clear = clearSave; } @@ -724,8 +738,20 @@ const GLclampd clearDepthSave = ctx->Depth.Clear; const GLuint clearStencilSave = ctx->Stencil.Clear; - /* set new clear values */ - ctx->Depth.Clear = depth; + /* set new clear values + * + * Page 263 (page 279 of the PDF) of the OpenGL 3.0 spec says: + * + * "depth and stencil are the values to clear the depth and stencil + * buffers to, respectively. Clamping and type conversion for + * fixed-point depth buffers are performed in the same fashion as + * for ClearDepth." + */ + const struct gl_renderbuffer *rb = + ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + const bool has_float_depth = rb && + _mesa_has_depth_float_channel(rb->InternalFormat); + ctx->Depth.Clear = has_float_depth ? depth : SATURATE(depth); ctx->Stencil.Clear = stencil; /* clear buffers */ diff -Nru mesa-20.2.1/src/mesa/main/get.c mesa-20.2.6/src/mesa/main/get.c --- mesa-20.2.1/src/mesa/main/get.c 2020-10-14 17:19:10.766519500 +0000 +++ mesa-20.2.6/src/mesa/main/get.c 2020-12-16 21:42:04.050110600 +0000 @@ -1704,6 +1704,7 @@ case TYPE_MATRIX_T: return sizeof (GLfloat) * 16; default: + assert(!"invalid value_type given for get_value_size()"); return -1; } } @@ -2349,9 +2350,6 @@ d = find_value(func, pname, &p, &v); size = get_value_size(d->type, &v); - if (size <= 0) { - _mesa_problem(ctx, "invalid value type in GetUnsignedBytevEXT()"); - } switch (d->type) { case TYPE_BIT_0: @@ -3262,9 +3260,6 @@ type = find_value_indexed(func, target, index, &v); size = get_value_size(type, &v); - if (size <= 0) { - _mesa_problem(ctx, "invalid value type in GetUnsignedBytei_vEXT()"); - } switch (type) { case TYPE_UINT: diff -Nru mesa-20.2.1/src/mesa/main/glformats.c mesa-20.2.6/src/mesa/main/glformats.c --- mesa-20.2.1/src/mesa/main/glformats.c 2020-10-14 17:19:10.766519500 +0000 +++ mesa-20.2.6/src/mesa/main/glformats.c 2020-12-16 21:42:04.051110700 +0000 @@ -1335,6 +1335,15 @@ } } +/** + * Test if the given image format has a floating-point depth component. + */ +GLboolean +_mesa_has_depth_float_channel(GLenum internalFormat) +{ + return internalFormat == GL_DEPTH32F_STENCIL8 || + internalFormat == GL_DEPTH_COMPONENT32F; +} /** * Test if an image format is a supported compressed format. diff -Nru mesa-20.2.1/src/mesa/main/glformats.h mesa-20.2.6/src/mesa/main/glformats.h --- mesa-20.2.1/src/mesa/main/glformats.h 2020-10-14 17:19:10.766519500 +0000 +++ mesa-20.2.6/src/mesa/main/glformats.h 2020-12-16 21:42:04.051110700 +0000 @@ -29,6 +29,7 @@ #include +#include #include @@ -105,6 +106,9 @@ _mesa_is_depth_or_stencil_format(GLenum format); extern GLboolean +_mesa_has_depth_float_channel(GLenum internalFormat); + +extern GLboolean _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format); extern GLboolean diff -Nru mesa-20.2.1/src/mesa/main/glspirv.c mesa-20.2.6/src/mesa/main/glspirv.c --- mesa-20.2.1/src/mesa/main/glspirv.c 2020-10-14 17:19:10.766519500 +0000 +++ mesa-20.2.6/src/mesa/main/glspirv.c 2020-12-16 21:42:04.051110700 +0000 @@ -295,6 +295,13 @@ } assert(exec_list_length(&nir->functions) == 1); + /* Now that we've deleted all but the main function, we can go ahead and + * lower the rest of the constant initializers. We do this here so that + * nir_remove_dead_variables and split_per_member_structs below see the + * corresponding stores. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); + /* Split member structs. We do this before lower_io_to_temporaries so that * it doesn't lower system values to temporaries by accident. */ diff -Nru mesa-20.2.1/src/mesa/main/mtypes.h mesa-20.2.6/src/mesa/main/mtypes.h --- mesa-20.2.1/src/mesa/main/mtypes.h 2020-10-14 17:19:10.769852900 +0000 +++ mesa-20.2.6/src/mesa/main/mtypes.h 2020-12-16 21:42:04.054110800 +0000 @@ -3876,6 +3876,14 @@ GLboolean AllowLayoutQualifiersOnFunctionParameters; /** + * Allow extra tokens at end of preprocessor directives. The CTS now tests + * to make sure these are not allowed. However, previously drivers would + * allow them to exist and just issue a warning so some old applications + * depend on this. + */ + GLboolean AllowExtraPPTokens; + + /** * Force computing the absolute value for sqrt() and inversesqrt() to follow * D3D9 when apps rely on this behaviour. */ diff -Nru mesa-20.2.1/src/mesa/main/readpix.c mesa-20.2.6/src/mesa/main/readpix.c --- mesa-20.2.1/src/mesa/main/readpix.c 2020-10-14 17:19:10.773186200 +0000 +++ mesa-20.2.6/src/mesa/main/readpix.c 2020-12-16 21:42:04.057110500 +0000 @@ -910,8 +910,7 @@ const GLenum data_type = _mesa_get_format_datatype(rb->Format); GLboolean is_unsigned_int = GL_FALSE; GLboolean is_signed_int = GL_FALSE; - GLboolean is_float_depth = (internalFormat == GL_DEPTH_COMPONENT32F) || - (internalFormat == GL_DEPTH32F_STENCIL8); + GLboolean is_float_depth = _mesa_has_depth_float_channel(internalFormat); is_unsigned_int = _mesa_is_enum_format_unsigned_int(internalFormat); if (!is_unsigned_int) { diff -Nru mesa-20.2.1/src/mesa/main/uniform_query.cpp mesa-20.2.6/src/mesa/main/uniform_query.cpp --- mesa-20.2.1/src/mesa/main/uniform_query.cpp 2020-10-14 17:19:10.783186400 +0000 +++ mesa-20.2.6/src/mesa/main/uniform_query.cpp 2020-12-16 21:42:04.066110600 +0000 @@ -1181,6 +1181,10 @@ /* Mark this bindless sampler as bound to a texture unit. */ if (sampler->unit != value || !sampler->bound) { + if (!flushed) { + FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM); + flushed = true; + } sampler->unit = value; changed = true; } @@ -1188,6 +1192,10 @@ sh->Program->sh.HasBoundBindlessSampler = true; } else { if (sh->Program->SamplerUnits[unit] != value) { + if (!flushed) { + FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM); + flushed = true; + } sh->Program->SamplerUnits[unit] = value; changed = true; } @@ -1195,11 +1203,6 @@ } if (changed) { - if (!flushed) { - FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM); - flushed = true; - } - struct gl_program *const prog = sh->Program; _mesa_update_shader_textures_used(shProg, prog); if (ctx->Driver.SamplerUniformChange) diff -Nru mesa-20.2.1/src/mesa/main/varray.c mesa-20.2.6/src/mesa/main/varray.c --- mesa-20.2.1/src/mesa/main/varray.c 2020-10-14 17:19:10.783186400 +0000 +++ mesa-20.2.6/src/mesa/main/varray.c 2020-12-16 21:42:04.066110600 +0000 @@ -459,8 +459,7 @@ */ static enum pipe_format vertex_format_to_pipe_format(GLubyte size, GLenum16 type, GLenum16 format, - GLboolean normalized, GLboolean integer, - GLboolean doubles) + bool normalized, bool integer, bool doubles) { assert(size >= 1 && size <= 4); assert(format == GL_RGBA || format == GL_BGRA); @@ -674,8 +673,8 @@ struct gl_vertex_array_object *vao, GLuint attrib, GLbitfield legalTypesMask, GLint sizeMin, GLint sizeMax, - GLint size, GLenum type, GLboolean normalized, - GLboolean integer, GLboolean doubles, + GLint size, GLenum type, bool normalized, + bool integer, bool doubles, GLuint relativeOffset, GLenum format) { GLbitfield typeBit; diff -Nru mesa-20.2.1/src/mesa/state_tracker/st_cb_eglimage.c mesa-20.2.6/src/mesa/state_tracker/st_cb_eglimage.c --- mesa-20.2.1/src/mesa/state_tracker/st_cb_eglimage.c 2020-10-14 17:19:10.793186400 +0000 +++ mesa-20.2.6/src/mesa/state_tracker/st_cb_eglimage.c 2020-12-16 21:42:04.077110800 +0000 @@ -327,7 +327,9 @@ &native_supported)) return; - st_bind_egl_image(ctx, texObj, texImage, &stimg, false, native_supported); + st_bind_egl_image(ctx, texObj, texImage, &stimg, + target != GL_TEXTURE_EXTERNAL_OES, + native_supported); pipe_resource_reference(&stimg.texture, NULL); } diff -Nru mesa-20.2.1/src/mesa/state_tracker/st_cb_memoryobjects.c mesa-20.2.6/src/mesa/state_tracker/st_cb_memoryobjects.c --- mesa-20.2.1/src/mesa/state_tracker/st_cb_memoryobjects.c 2020-10-14 17:19:10.793186400 +0000 +++ mesa-20.2.6/src/mesa/state_tracker/st_cb_memoryobjects.c 2020-12-16 21:42:04.077110800 +0000 @@ -59,7 +59,8 @@ struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; - screen->memobj_destroy(screen, st_obj->memory); + if (st_obj->memory) + screen->memobj_destroy(screen, st_obj->memory); _mesa_delete_memory_object(ctx, obj); } diff -Nru mesa-20.2.1/src/mesa/state_tracker/st_extensions.c mesa-20.2.6/src/mesa/state_tracker/st_extensions.c --- mesa-20.2.1/src/mesa/state_tracker/st_extensions.c 2020-10-14 17:19:10.796519800 +0000 +++ mesa-20.2.6/src/mesa/state_tracker/st_extensions.c 2020-12-16 21:42:04.083110600 +0000 @@ -1124,6 +1124,8 @@ consts->ForceGLSLVersion = options->force_glsl_version; } + consts->AllowExtraPPTokens = options->allow_extra_pp_tokens; + consts->AllowHigherCompatVersion = options->allow_higher_compat_version; consts->ForceGLSLAbsSqrt = options->force_glsl_abs_sqrt; diff -Nru mesa-20.2.1/src/mesa/state_tracker/st_format.c mesa-20.2.6/src/mesa/state_tracker/st_format.c --- mesa-20.2.1/src/mesa/state_tracker/st_format.c 2020-10-14 17:19:10.796519800 +0000 +++ mesa-20.2.6/src/mesa/state_tracker/st_format.c 2020-12-16 21:42:04.083110600 +0000 @@ -234,19 +234,19 @@ DEFAULT_RGB_FORMATS } }, { - { GL_RGB4 }, + { GL_RGB4, 0 }, { PIPE_FORMAT_B4G4R4X4_UNORM, PIPE_FORMAT_B4G4R4A4_UNORM, PIPE_FORMAT_A4B4G4R4_UNORM, DEFAULT_RGB_FORMATS } }, { - { GL_RGB5 }, + { GL_RGB5, 0 }, { PIPE_FORMAT_B5G5R5X1_UNORM, PIPE_FORMAT_X1B5G5R5_UNORM, PIPE_FORMAT_B5G5R5A1_UNORM, PIPE_FORMAT_A1B5G5R5_UNORM, DEFAULT_RGB_FORMATS } }, { - { GL_RGB565 }, + { GL_RGB565, 0 }, { PIPE_FORMAT_B5G6R5_UNORM, DEFAULT_RGB_FORMATS } }, diff -Nru mesa-20.2.1/src/mesa/state_tracker/st_pbo.c mesa-20.2.6/src/mesa/state_tracker/st_pbo.c --- mesa-20.2.1/src/mesa/state_tracker/st_pbo.c 2020-10-14 17:19:10.796519800 +0000 +++ mesa-20.2.6/src/mesa/state_tracker/st_pbo.c 2020-12-16 21:42:04.086110600 +0000 @@ -202,7 +202,7 @@ return false; } - if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) { + if (st->pbo.use_gs && !st->pbo.gs) { st->pbo.gs = st_pbo_create_gs(st); if (!st->pbo.gs) return false; diff -Nru mesa-20.2.1/src/util/00-mesa-defaults.conf mesa-20.2.6/src/util/00-mesa-defaults.conf --- mesa-20.2.1/src/util/00-mesa-defaults.conf 2020-10-14 17:19:10.819853300 +0000 +++ mesa-20.2.6/src/util/00-mesa-defaults.conf 2020-12-16 21:42:04.110110800 +0000 @@ -246,6 +246,10 @@