diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,47 @@ +define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/OVERRIDES_3.9 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/OVERRIDES_3.9 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/OVERRIDES_3.9 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/OVERRIDES_3.9 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,3 @@ +workitem/get_global_size.ll +workitem/get_local_size.ll +workitem/get_num_groups.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/SOURCES libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/SOURCES --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/SOURCES 2017-09-12 20:06:29.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/SOURCES 2017-09-25 21:47:58.000000000 +0000 @@ -1,3 +1,4 @@ +cl_khr_int64_extended_atomics/minmax_helpers.ll math/ldexp.cl mem_fence/fence.cl mem_fence/waitcnt.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/SOURCES_3.9 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/SOURCES_3.9 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/SOURCES_3.9 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/SOURCES_3.9 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,3 @@ +workitem/get_global_size.39.ll +workitem/get_local_size.39.ll +workitem/get_num_groups.39.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/workitem/get_global_size.39.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/workitem/get_global_size.39.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/workitem/get_global_size.39.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/workitem/get_global_size.39.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,18 @@ +declare i32 @llvm.r600.read.global.size.x() nounwind readnone +declare i32 @llvm.r600.read.global.size.y() nounwind readnone +declare i32 @llvm.r600.read.global.size.z() nounwind readnone + +define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline { + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.r600.read.global.size.x() + ret i32 %x +y_dim: + %y = call i32 @llvm.r600.read.global.size.y() + ret i32 %y +z_dim: + %z = call i32 @llvm.r600.read.global.size.z() + ret i32 %z +default: + ret i32 1 +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/workitem/get_local_size.39.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/workitem/get_local_size.39.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/workitem/get_local_size.39.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/workitem/get_local_size.39.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,18 @@ +declare i32 @llvm.r600.read.local.size.x() nounwind readnone +declare i32 @llvm.r600.read.local.size.y() nounwind readnone +declare i32 @llvm.r600.read.local.size.z() nounwind readnone + +define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline { + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.r600.read.local.size.x() + ret i32 %x +y_dim: + %y = call i32 @llvm.r600.read.local.size.y() + ret i32 %y +z_dim: + %z = call i32 @llvm.r600.read.local.size.z() + ret i32 %z +default: + ret i32 1 +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/workitem/get_num_groups.39.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/workitem/get_num_groups.39.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/workitem/get_num_groups.39.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/workitem/get_num_groups.39.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,18 @@ +declare i32 @llvm.r600.read.ngroups.x() nounwind readnone +declare i32 @llvm.r600.read.ngroups.y() nounwind readnone +declare i32 @llvm.r600.read.ngroups.z() nounwind readnone + +define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline { + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.r600.read.ngroups.x() + ret i32 %x +y_dim: + %y = call i32 @llvm.r600.read.ngroups.y() + ret i32 %y +z_dim: + %z = call i32 @llvm.r600.read.ngroups.z() + ret i32 %z +default: + ret i32 1 +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/workitem/get_work_dim.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/workitem/get_work_dim.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn/lib/workitem/get_work_dim.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn/lib/workitem/get_work_dim.cl 2017-10-19 16:06:04.000000000 +0000 @@ -1,6 +1,6 @@ #include -_CLC_DEF uint get_work_dim() +_CLC_DEF uint get_work_dim(void) { __attribute__((address_space(2))) uint * ptr = (__attribute__((address_space(2))) uint *) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn-amdhsa/lib/OVERRIDES_3.9 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn-amdhsa/lib/OVERRIDES_3.9 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn-amdhsa/lib/OVERRIDES_3.9 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn-amdhsa/lib/OVERRIDES_3.9 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,3 @@ +workitem/get_global_size.ll +workitem/get_local_size.ll +workitem/get_num_groups.39.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn-amdhsa/lib/SOURCES_3.9 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn-amdhsa/lib/SOURCES_3.9 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn-amdhsa/lib/SOURCES_3.9 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn-amdhsa/lib/SOURCES_3.9 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,2 @@ +workitem/get_global_size.39.ll +workitem/get_local_size.39.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn-amdhsa/lib/workitem/get_global_size.39.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn-amdhsa/lib/workitem/get_global_size.39.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn-amdhsa/lib/workitem/get_global_size.39.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn-amdhsa/lib/workitem/get_global_size.39.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,36 @@ +declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + +define i32 @get_global_size(i32 %dim) #1 { + %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() + switch i32 %dim, label %default [ + i32 0, label %x + i32 1, label %y + i32 2, label %z + ] + +x: + %ptr_x = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 12 + %ptr_x32 = bitcast i8 addrspace(2)* %ptr_x to i32 addrspace(2)* + %x32 = load i32, i32 addrspace(2)* %ptr_x32, align 4, !invariant.load !0 + ret i32 %x32 + +y: + %ptr_y = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 16 + %ptr_y32 = bitcast i8 addrspace(2)* %ptr_y to i32 addrspace(2)* + %y32 = load i32, i32 addrspace(2)* %ptr_y32, align 4, !invariant.load !0 + ret i32 %y32 + +z: + %ptr_z = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 20 + %ptr_z32 = bitcast i8 addrspace(2)* %ptr_z to i32 addrspace(2)* + %z32 = load i32, i32 addrspace(2)* %ptr_z32, align 4, !invariant.load !0 + ret i32 %z32 + +default: + ret i32 1 +} + +attributes #0 = { nounwind readnone } +attributes #1 = { alwaysinline norecurse nounwind readonly } + +!0 = !{} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn-amdhsa/lib/workitem/get_local_size.39.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn-amdhsa/lib/workitem/get_local_size.39.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgcn-amdhsa/lib/workitem/get_local_size.39.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgcn-amdhsa/lib/workitem/get_local_size.39.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,35 @@ +declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + +define i32 @get_local_size(i32 %dim) #1 { + %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() + %dispatch_ptr_i32 = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)* + %xy_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i32 1 + %xy_size = load i32, i32 addrspace(2)* %xy_size_ptr, align 4, !invariant.load !0 + switch i32 %dim, label %default [ + i32 0, label %x_dim + i32 1, label %y_dim + i32 2, label %z_dim + ] + +x_dim: + %x_size = and i32 %xy_size, 65535 + ret i32 %x_size + +y_dim: + %y_size = lshr i32 %xy_size, 16 + ret i32 %y_size + +z_dim: + %z_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i32 2 + %z_size = load i32, i32 addrspace(2)* %z_size_ptr, align 4, !invariant.load !0, !range !1 + ret i32 %z_size + +default: + ret i32 1 +} + +attributes #0 = { nounwind readnone } +attributes #1 = { alwaysinline norecurse nounwind readonly } + +!0 = !{} +!1 = !{ i32 0, i32 257 } diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/atomic/atomic.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/atomic/atomic.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/atomic/atomic.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/atomic/atomic.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,65 +0,0 @@ -#include - -#define ATOMIC_FUNC_DEFINE(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ -_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE val) { \ - return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)val); \ -} - -/* For atomic functions that don't need different bitcode dependending on argument signedness */ -#define ATOMIC_FUNC_SIGN(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE); \ - ATOMIC_FUNC_DEFINE(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - ATOMIC_FUNC_DEFINE(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) - -#define ATOMIC_FUNC_ADDRSPACE(TYPE, FUNCTION) \ - ATOMIC_FUNC_SIGN(TYPE, FUNCTION, global, 1) \ - ATOMIC_FUNC_SIGN(TYPE, FUNCTION, local, 3) - -#define ATOMIC_FUNC(FUNCTION) \ - ATOMIC_FUNC_ADDRSPACE(int, FUNCTION) - -#define ATOMIC_FUNC_DEFINE_3_ARG(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ -_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE cmp, RET_SIGN TYPE val) { \ - return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)cmp, (ARG_SIGN TYPE)val); \ -} - -/* For atomic functions that don't need different bitcode dependending on argument signedness */ -#define ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE, signed TYPE); \ - ATOMIC_FUNC_DEFINE_3_ARG(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - ATOMIC_FUNC_DEFINE_3_ARG(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) - -#define ATOMIC_FUNC_ADDRSPACE_3_ARG(TYPE, FUNCTION) \ - ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, global, 1) \ - ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, local, 3) - -#define ATOMIC_FUNC_3_ARG(FUNCTION) \ - ATOMIC_FUNC_ADDRSPACE_3_ARG(int, FUNCTION) - -ATOMIC_FUNC(atomic_add) -ATOMIC_FUNC(atomic_and) -ATOMIC_FUNC(atomic_or) -ATOMIC_FUNC(atomic_sub) -ATOMIC_FUNC(atomic_xchg) -ATOMIC_FUNC(atomic_xor) -ATOMIC_FUNC_3_ARG(atomic_cmpxchg) - -_CLC_DECL signed int __clc_atomic_max_addr1(volatile global signed int*, signed int); -_CLC_DECL signed int __clc_atomic_max_addr3(volatile local signed int*, signed int); -_CLC_DECL uint __clc_atomic_umax_addr1(volatile global uint*, uint); -_CLC_DECL uint __clc_atomic_umax_addr3(volatile local uint*, uint); - -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, global, 1) -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, local, 3) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, global, 1) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, local, 3) - -_CLC_DECL signed int __clc_atomic_min_addr1(volatile global signed int*, signed int); -_CLC_DECL signed int __clc_atomic_min_addr3(volatile local signed int*, signed int); -_CLC_DECL uint __clc_atomic_umin_addr1(volatile global uint*, uint); -_CLC_DECL uint __clc_atomic_umin_addr3(volatile local uint*, uint); - -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, global, 1) -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, local, 3) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, global, 1) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, local, 3) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_attributes_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_attributes_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_attributes_impl.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_attributes_impl.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,87 +0,0 @@ -%opencl.image2d_t = type opaque -%opencl.image3d_t = type opaque - -declare i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare i32 @llvm.OpenCL.image.get.resource.id.3d( - %opencl.image3d_t addrspace(1)*) nounwind readnone - -declare [3 x i32] @llvm.OpenCL.image.get.size.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare [3 x i32] @llvm.OpenCL.image.get.size.3d( - %opencl.image3d_t addrspace(1)*) nounwind readnone - -declare [2 x i32] @llvm.OpenCL.image.get.format.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare [2 x i32] @llvm.OpenCL.image.get.format.3d( - %opencl.image3d_t addrspace(1)*) nounwind readnone - -define i32 @__clc_get_image_width_2d( - %opencl.image2d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d( - %opencl.image2d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 0 - ret i32 %2 -} -define i32 @__clc_get_image_width_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 0 - ret i32 %2 -} - -define i32 @__clc_get_image_height_2d( - %opencl.image2d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d( - %opencl.image2d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 1 - ret i32 %2 -} -define i32 @__clc_get_image_height_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 1 - ret i32 %2 -} - -define i32 @__clc_get_image_depth_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [3 x i32] %1, 2 - ret i32 %2 -} - -define i32 @__clc_get_image_channel_data_type_2d( - %opencl.image2d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d( - %opencl.image2d_t addrspace(1)* %img) - %2 = extractvalue [2 x i32] %1, 0 - ret i32 %2 -} -define i32 @__clc_get_image_channel_data_type_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [2 x i32] %1, 0 - ret i32 %2 -} - -define i32 @__clc_get_image_channel_order_2d( - %opencl.image2d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d( - %opencl.image2d_t addrspace(1)* %img) - %2 = extractvalue [2 x i32] %1, 1 - ret i32 %2 -} -define i32 @__clc_get_image_channel_order_3d( - %opencl.image3d_t addrspace(1)* nocapture %img) #0 { - %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d( - %opencl.image3d_t addrspace(1)* %img) - %2 = extractvalue [2 x i32] %1, 1 - ret i32 %2 -} - -attributes #0 = { nounwind readnone alwaysinline } diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_channel_data_type.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_channel_data_type.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_channel_data_type.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_channel_data_type.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t); -_CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_channel_data_type(image2d_t image) { - return __clc_get_image_channel_data_type_2d(image); -} -_CLC_OVERLOAD _CLC_DEF int -get_image_channel_data_type(image3d_t image) { - return __clc_get_image_channel_data_type_3d(image); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_channel_order.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_channel_order.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_channel_order.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_channel_order.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_channel_order_2d(image2d_t); -_CLC_DECL int __clc_get_image_channel_order_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_channel_order(image2d_t image) { - return __clc_get_image_channel_order_2d(image); -} -_CLC_OVERLOAD _CLC_DEF int -get_image_channel_order(image3d_t image) { - return __clc_get_image_channel_order_3d(image); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_depth.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_depth.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_depth.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_depth.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_depth_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_depth(image3d_t image) { - return __clc_get_image_depth_3d(image); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_height.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_height.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_height.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_height.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_height_2d(image2d_t); -_CLC_DECL int __clc_get_image_height_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_height(image2d_t image) { - return __clc_get_image_height_2d(image); -} -_CLC_OVERLOAD _CLC_DEF int -get_image_height(image3d_t image) { - return __clc_get_image_height_3d(image); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_width.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_width.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/get_image_width.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/get_image_width.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -#include - -_CLC_DECL int __clc_get_image_width_2d(image2d_t); -_CLC_DECL int __clc_get_image_width_3d(image3d_t); - -_CLC_OVERLOAD _CLC_DEF int -get_image_width(image2d_t image) { - return __clc_get_image_width_2d(image); -} -_CLC_OVERLOAD _CLC_DEF int -get_image_width(image3d_t image) { - return __clc_get_image_width_3d(image); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/read_imagef.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/read_imagef.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/read_imagef.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/read_imagef.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -#include - -_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); - -_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, - int2 coord) { - float2 coord_float = (float2)(coord.x, coord.y); - return __clc_read_imagef_tex(image, sampler, coord_float); -} - -_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, - float2 coord) { - return __clc_read_imagef_tex(image, sampler, coord); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/read_imagei.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/read_imagei.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/read_imagei.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/read_imagei.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -#include - -_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); - -int4 __clc_reinterpret_v4f_to_v4i(float4 v) { - union { - int4 v4i; - float4 v4f; - } res = { .v4f = v}; - return res.v4i; -} - -_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, - int2 coord) { - float2 coord_float = (float2)(coord.x, coord.y); - return __clc_reinterpret_v4f_to_v4i( - __clc_read_imagef_tex(image, sampler, coord_float)); -} -_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, - float2 coord) { - return __clc_reinterpret_v4f_to_v4i( - __clc_read_imagef_tex(image, sampler, coord)); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/read_image_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/read_image_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/read_image_impl.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/read_image_impl.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -%opencl.image2d_t = type opaque - -declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, - i32, i32, i32) readnone -declare i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone - -define <4 x float> @__clc_v4f_from_v2f(<2 x float> %v) alwaysinline { - %e0 = extractelement <2 x float> %v, i32 0 - %e1 = extractelement <2 x float> %v, i32 1 - %res.0 = insertelement <4 x float> undef, float %e0, i32 0 - %res.1 = insertelement <4 x float> %res.0, float %e1, i32 1 - %res.2 = insertelement <4 x float> %res.1, float 0.0, i32 2 - %res.3 = insertelement <4 x float> %res.2, float 0.0, i32 3 - ret <4 x float> %res.3 -} - -define <4 x float> @__clc_read_imagef_tex( - %opencl.image2d_t addrspace(1)* nocapture %img, - i32 %sampler, <2 x float> %coord) alwaysinline { -entry: - %coord_v4 = call <4 x float> @__clc_v4f_from_v2f(<2 x float> %coord) - %smp_id = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %sampler) - %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)* %img) - %tex_id = add i32 %img_id, 2 ; First 2 IDs are reserved. - - %coord_norm = and i32 %sampler, 1 - %is_norm = icmp eq i32 %coord_norm, 1 - br i1 %is_norm, label %NormCoord, label %UnnormCoord -NormCoord: - %data.norm = call <4 x float> @llvm.R600.tex( - <4 x float> %coord_v4, - i32 0, i32 0, i32 0, ; Offset. - i32 2, i32 %smp_id, - i32 1, i32 1, i32 1, i32 1) ; Normalized coords. - ret <4 x float> %data.norm -UnnormCoord: - %data.unnorm = call <4 x float> @llvm.R600.tex( - <4 x float> %coord_v4, - i32 0, i32 0, i32 0, ; Offset. - i32 %tex_id, i32 %smp_id, - i32 0, i32 0, i32 0, i32 0) ; Unnormalized coords. - ret <4 x float> %data.unnorm -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/read_imageui.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/read_imageui.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/read_imageui.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/read_imageui.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -#include - -_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); - -uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) { - union { - uint4 v4ui; - float4 v4f; - } res = { .v4f = v}; - return res.v4ui; -} - -_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, - int2 coord) { - float2 coord_float = (float2)(coord.x, coord.y); - return __clc_reinterpret_v4f_to_v4ui( - __clc_read_imagef_tex(image, sampler, coord_float)); -} -_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, - float2 coord) { - return __clc_reinterpret_v4f_to_v4ui( - __clc_read_imagef_tex(image, sampler, coord)); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/write_imagef.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/write_imagef.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/write_imagef.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/write_imagef.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -#include - -_CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color); - -_CLC_OVERLOAD _CLC_DEF void -write_imagef(image2d_t image, int2 coord, float4 color) -{ - __clc_write_imagef_2d(image, coord, color); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/write_imagei.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/write_imagei.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/write_imagei.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/write_imagei.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -#include - -_CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color); - -_CLC_OVERLOAD _CLC_DEF void -write_imagei(image2d_t image, int2 coord, int4 color) -{ - __clc_write_imagei_2d(image, coord, color); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/write_image_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/write_image_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/write_image_impl.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/write_image_impl.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -%opencl.image2d_t = type opaque -%opencl.image3d_t = type opaque - -declare i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)*) nounwind readnone -declare i32 @llvm.OpenCL.image.get.resource.id.3d( - %opencl.image3d_t addrspace(1)*) nounwind readnone - -declare void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord, i32 %rat_id) - -define void @__clc_write_imageui_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x i32> %color) #0 { - - ; Coordinate int2 -> int4. - %e0 = extractelement <2 x i32> %coord, i32 0 - %e1 = extractelement <2 x i32> %coord, i32 1 - %coord.0 = insertelement <4 x i32> undef, i32 %e0, i32 0 - %coord.1 = insertelement <4 x i32> %coord.0, i32 %e1, i32 1 - %coord.2 = insertelement <4 x i32> %coord.1, i32 0, i32 2 - %coord.3 = insertelement <4 x i32> %coord.2, i32 0, i32 3 - - ; Get RAT ID. - %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d( - %opencl.image2d_t addrspace(1)* %img) - %rat_id = add i32 %img_id, 1 - - ; Call store intrinsic. - call void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord.3, i32 %rat_id) - ret void -} - -define void @__clc_write_imagei_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x i32> %color) #0 { - call void @__clc_write_imageui_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x i32> %color) - ret void -} - -define void @__clc_write_imagef_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x float> %color) #0 { - %color.i32 = bitcast <4 x float> %color to <4 x i32> - call void @__clc_write_imageui_2d( - %opencl.image2d_t addrspace(1)* nocapture %img, - <2 x i32> %coord, <4 x i32> %color.i32) - ret void -} - -attributes #0 = { alwaysinline } diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/write_imageui.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/write_imageui.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/image/write_imageui.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/image/write_imageui.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -#include - -_CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color); - -_CLC_OVERLOAD _CLC_DEF void -write_imageui(image2d_t image, int2 coord, uint4 color) -{ - __clc_write_imageui_2d(image, coord, color); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/math/nextafter.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/math/nextafter.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/math/nextafter.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/math/nextafter.cl 2017-10-19 16:06:04.000000000 +0000 @@ -1,5 +1,6 @@ #include #include "../lib/clcmacro.h" +#include _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/SOURCES libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/SOURCES --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/SOURCES 2017-09-12 20:06:29.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/SOURCES 2017-10-19 16:06:04.000000000 +0000 @@ -1,17 +1,2 @@ -atomic/atomic.cl math/nextafter.cl math/sqrt.cl -image/get_image_width.cl -image/get_image_height.cl -image/get_image_depth.cl -image/get_image_channel_data_type.cl -image/get_image_channel_order.cl -image/get_image_attributes_impl.ll -image/read_imagef.cl -image/read_imagei.cl -image/read_imageui.cl -image/read_image_impl.ll -image/write_imagef.cl -image/write_imagei.cl -image/write_imageui.cl -image/write_image_impl.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/SOURCES_3.9 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/SOURCES_3.9 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/amdgpu/lib/SOURCES_3.9 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/amdgpu/lib/SOURCES_3.9 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,2 @@ +shared/vload_half_helpers.ll +shared/vstore_half_helpers.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/check_external_calls.sh libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/check_external_calls.sh --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/check_external_calls.sh 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/check_external_calls.sh 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,36 @@ +#!/bin/sh + +FILE=$1 +if [ ! -f $FILE ]; then + echo "ERROR: Not a file: $FILE" + exit 3 +fi +ret=0 +if [ "x$LLVM_CONFIG" = "x" ]; then + LLVM_CONFIG=llvm-config + echo 'WARNING: $LLVM_CONFIG not set, falling back to $PATH llvm-config' + ret=2 +fi + + +BIN_DIR=$($LLVM_CONFIG --bindir) +DIS="$BIN_DIR/llvm-dis" +if [ ! -x $DIS ]; then + echo "ERROR: Disassembler '$DIS' is not executable" + exit 3 +fi + +TMP_FILE=$(mktemp) + +# Check for calls. Calls to llvm intrinsics are OK +$DIS < $FILE | grep ' call ' | grep -v '@llvm' > "$TMP_FILE" +COUNT=$(wc -l < "$TMP_FILE") + +if [ "$COUNT" -ne "0" ]; then + echo "ERROR: $COUNT unresolved calls detected in $FILE" + cat $TMP_FILE + ret=1 +else + echo "File $FILE is OK" +fi +exit $ret diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/configure.py libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/configure.py --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/configure.py 2017-09-12 20:06:29.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/configure.py 2017-10-19 16:06:04.000000000 +0000 @@ -72,8 +72,8 @@ llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10 llvm_string_version = llvm_version[0] + '.' + llvm_version[1] -if llvm_int_version < 400: - print("libclc requires LLVM >= 4.0") +if llvm_int_version < 390: + print("libclc requires LLVM >= 3.9") sys.exit(1) llvm_system_libs = llvm_config(['--system-libs']) @@ -81,7 +81,8 @@ llvm_core_libs = llvm_config(['--libs', 'core', 'bitreader', 'bitwriter']) + ' ' + \ llvm_system_libs + ' ' + \ llvm_config(['--ldflags']) -llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti' +llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti ' + \ + '-DHAVE_LLVM=0x{:0=4}'.format(llvm_int_version) llvm_libdir = llvm_config(['--libdir']) llvm_clang = os.path.join(llvm_bindir, 'clang') @@ -108,9 +109,13 @@ 'nvptx64--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]}, } -available_targets['amdgcn-mesa-mesa3d'] = available_targets['amdgcn--'] -default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--', 'amdgcn--amdhsa', 'amdgcn-mesa-mesa3d'] +default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--', 'amdgcn--amdhsa'] + +#mesa is using amdgcn-mesa-mesa3d since llvm-4.0 +if llvm_int_version > 390: + available_targets['amdgcn-mesa-mesa3d'] = available_targets['amdgcn--'] + default_targets.append('amdgcn-mesa-mesa3d') targets = args if not targets: @@ -180,7 +185,8 @@ incdirs = filter(os.path.isdir, [os.path.join(srcdir, subdir, 'include') for subdir in subdirs]) - libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')), + libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')) or + os.path.isfile(os.path.join(d, 'SOURCES_' + llvm_string_version)), [os.path.join(srcdir, subdir, 'lib') for subdir in subdirs]) # The above are iterables in python3 but we might use them multiple times @@ -213,24 +219,35 @@ for libdir in libdirs: subdir_list_file = os.path.join(libdir, 'SOURCES') - manifest_deps.add(subdir_list_file) + if os.path.exists(subdir_list_file): + manifest_deps.add(subdir_list_file) override_list_file = os.path.join(libdir, 'OVERRIDES') compat_list_file = os.path.join(libdir, 'SOURCES_' + llvm_string_version) + compat_list_override = os.path.join(libdir, + 'OVERRIDES_' + llvm_string_version) # Build compat list if os.path.exists(compat_list_file): + manifest_deps.add(compat_list_file) for compat in open(compat_list_file).readlines(): compat = compat.rstrip() compats.append(compat) + # Add target compat overrides + if os.path.exists(compat_list_override): + for override in open(compat_list_override).readlines(): + override = override.rstrip() + sources_seen.add(override) + # Add target overrides if os.path.exists(override_list_file): for override in open(override_list_file).readlines(): override = override.rstrip() sources_seen.add(override) - for src in open(subdir_list_file).readlines() + compats: + files = open(subdir_list_file).readlines() if os.path.exists(subdir_list_file) else [] + for src in files + compats: src = src.rstrip() if src not in sources_seen: sources_seen.add(src) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/debian/changelog libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/debian/changelog --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/debian/changelog 2017-09-12 20:08:05.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/debian/changelog 2017-10-19 16:08:12.000000000 +0000 @@ -1,9 +1,9 @@ -libclc (0.2.0+git20170912.1707.3ab9165~x~padoka0) xenial; urgency=medium +libclc (0.2.0+git20171019.1407.b61116b~x~padoka0) xenial; urgency=medium * Checkout from master git branch up to commit - 3ab9165319082ab7b847615ddba8774f3861bd98 + b61116b8475675064e3c9fbf65a5384c5a3f9485 - -- Paulo Dias Tue, 12 Sep 2017 17:08:05 -0300 + -- Paulo Dias Thu, 19 Oct 2017 14:08:12 -0200 libclc (0.2.0+git20150813-2) unstable; urgency=medium diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/clc.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/clc.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/clc.h 2017-09-12 20:06:29.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/clc.h 2017-10-19 16:06:04.000000000 +0000 @@ -106,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +237,25 @@ #include #include +/* cl_khr_int64_base_atomics Extension Functions */ +#ifdef cl_khr_int64_base_atomics +#include +#include +#include +#include +#include +#include +#endif + +/* cl_khr_int64_extended_atomics Extension Functions */ +#ifdef cl_khr_int64_base_atomics +#include +#include +#include +#include +#include +#endif + /* 6.12.12 Miscellaneous Vector Functions */ #include #include @@ -244,9 +264,4 @@ #include #include -/* libclc internal defintions */ -#ifdef __CLC_INTERNAL -#include -#endif - #pragma OPENCL EXTENSION all : disable diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/clcmacros.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/clcmacros.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/clcmacros.h 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/clcmacros.h 2017-10-19 16:06:04.000000000 +0000 @@ -9,7 +9,7 @@ #define CLC_VERSION_1_2 120 #endif -#define NULL ((void*)NULL) +#define NULL ((void*)0) #define __kernel_exec(X, typen) __kernel \ __attribute__((work_group_size_hint(X, 1, 1))) \ diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_add(volatile global long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_add(volatile global unsigned long *p, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_add(volatile local long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_add(volatile local unsigned long *p, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_cmpxchg.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_cmpxchg.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_cmpxchg.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_cmpxchg.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_cmpxchg(volatile global long *p, long cmp, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_cmpxchg(volatile global unsigned long *p, unsigned long cmp, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_cmpxchg(volatile local long *p, long cmp, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_cmpxchg(volatile local unsigned long *p, unsigned long cmp, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_dec.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_dec.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_dec.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_dec.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_dec(volatile global long *p); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_dec(volatile global unsigned long *p); +_CLC_OVERLOAD _CLC_DECL long atom_dec(volatile local long *p); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_dec(volatile local unsigned long *p); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_inc.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_inc.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_inc.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_inc.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_inc(volatile global long *p); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_inc(volatile global unsigned long *p); +_CLC_OVERLOAD _CLC_DECL long atom_inc(volatile local long *p); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_inc(volatile local unsigned long *p); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_sub(volatile global long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_sub(volatile global unsigned long *p, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_sub(volatile local long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_sub(volatile local unsigned long *p, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_xchg(volatile global long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_xchg(volatile global unsigned long *p, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_xchg(volatile local long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_xchg(volatile local unsigned long *p, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_and(volatile global long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_and(volatile global unsigned long *p, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_and(volatile local long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_and(volatile local unsigned long *p, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_max(volatile global long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_max(volatile global unsigned long *p, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_max(volatile local long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_max(volatile local unsigned long *p, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_min(volatile global long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_min(volatile global unsigned long *p, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_min(volatile local long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_min(volatile local unsigned long *p, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_or(volatile global long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_or(volatile global unsigned long *p, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_or(volatile local long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_or(volatile local unsigned long *p, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,4 @@ +_CLC_OVERLOAD _CLC_DECL long atom_xor(volatile global long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_xor(volatile global unsigned long *p, unsigned long val); +_CLC_OVERLOAD _CLC_DECL long atom_xor(volatile local long *p, long val); +_CLC_OVERLOAD _CLC_DECL unsigned long atom_xor(volatile local unsigned long *p, unsigned long val); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/geometric/floatn.inc libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/geometric/floatn.inc --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/geometric/floatn.inc 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/geometric/floatn.inc 2017-10-19 16:06:04.000000000 +0000 @@ -19,14 +19,6 @@ #include __CLC_BODY #undef __CLC_FLOATN -#define __CLC_FLOATN float8 -#include __CLC_BODY -#undef __CLC_FLOATN - -#define __CLC_FLOATN float16 -#include __CLC_BODY -#undef __CLC_FLOATN - #undef __CLC_FLOAT #undef __CLC_FPSIZE @@ -54,14 +46,6 @@ #include __CLC_BODY #undef __CLC_FLOATN -#define __CLC_FLOATN double8 -#include __CLC_BODY -#undef __CLC_FLOATN - -#define __CLC_FLOATN double16 -#include __CLC_BODY -#undef __CLC_FLOATN - #undef __CLC_FLOAT #undef __CLC_FPSIZE diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/binary_decl.inc libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/binary_decl.inc --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/binary_decl.inc 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/binary_decl.inc 2017-10-19 16:06:04.000000000 +0000 @@ -1,6 +1,2 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b); -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, float b); - -#ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, double b); -#endif +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/clc_nextafter.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/clc_nextafter.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/clc_nextafter.h 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/clc_nextafter.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -#define __CLC_BODY - -#define __CLC_FUNCTION nextafter -#include -#undef __CLC_FUNCTION - -#define __CLC_FUNCTION __clc_nextafter -#include -#undef __CLC_FUNCTION - -#undef __CLC_BODY diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/gentype.inc libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/gentype.inc --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/gentype.inc 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/gentype.inc 2017-10-19 16:06:04.000000000 +0000 @@ -54,6 +54,8 @@ #ifndef __FLOAT_ONLY #ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + #define __CLC_SCALAR_GENTYPE double #define __CLC_FPSIZE 64 diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/native_recip.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/native_recip.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/native_recip.h 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/native_recip.h 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1 @@ +#define native_recip(x) ((1) / (x)) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/nextafter.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/nextafter.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/nextafter.h 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/nextafter.h 2017-10-19 16:06:04.000000000 +0000 @@ -1,5 +1,2 @@ -#define __CLC_BODY -#define __CLC_FUNCTION nextafter +#define __CLC_BODY #include -#undef __CLC_FUNCTION -#undef __CLC_BODY diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/nextafter.inc libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/nextafter.inc --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/math/nextafter.inc 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/math/nextafter.inc 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE nextafter(__CLC_GENTYPE a, __CLC_GENTYPE b); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/workitem/get_work_dim.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/workitem/get_work_dim.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/clc/workitem/get_work_dim.h 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/clc/workitem/get_work_dim.h 2017-10-19 16:06:04.000000000 +0000 @@ -1 +1 @@ -_CLC_DECL uint get_work_dim(); +_CLC_DECL uint get_work_dim(void); diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/math/clc_ldexp.h libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/math/clc_ldexp.h --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/include/math/clc_ldexp.h 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/include/math/clc_ldexp.h 2017-10-19 16:06:04.000000000 +0000 @@ -2,5 +2,5 @@ #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable - _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(double, int); + _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int); #endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_add.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_add.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_add.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_add.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_add(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_and.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_and.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_and.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_and.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_and(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_cmpxchg.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_cmpxchg.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_cmpxchg.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_cmpxchg.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, TYPE val) { \ + return __sync_val_compare_and_swap(p, cmp, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_impl.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_impl.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,133 +0,0 @@ -define i32 @__clc_atomic_add_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_add_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile add i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_and_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_and_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile and i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_cmpxchg_addr1(i32 addrspace(1)* nocapture %ptr, i32 %compare, i32 %value) nounwind alwaysinline { -entry: - %0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %compare, i32 %value seq_cst seq_cst - %1 = extractvalue { i32, i1 } %0, 0 - ret i32 %1 -} - -define i32 @__clc_atomic_cmpxchg_addr3(i32 addrspace(3)* nocapture %ptr, i32 %compare, i32 %value) nounwind alwaysinline { -entry: - %0 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 %compare, i32 %value seq_cst seq_cst - %1 = extractvalue { i32, i1 } %0, 0 - ret i32 %1 -} - -define i32 @__clc_atomic_max_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_max_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile max i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_min_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_min_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile min i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_or_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_or_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile or i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_umax_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_umax_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile umax i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_umin_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_umin_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile umin i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_sub_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_sub_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile sub i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_xchg_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_xchg_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile xchg i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_xor_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_xor_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile xor i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_max.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_max.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_max.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_max.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS, OP) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_##OP(p, val); \ +} + +IMPL(int, global, max) +IMPL(unsigned int, global, umax) +IMPL(int, local, max) +IMPL(unsigned int, local, umax) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_min.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_min.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_min.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_min.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS, OP) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_##OP(p, val); \ +} + +IMPL(int, global, min) +IMPL(unsigned int, global, umin) +IMPL(int, local, min) +IMPL(unsigned int, local, umin) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_or.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_or.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_or.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_or.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_or(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_sub.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_sub.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_sub.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_sub.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_sub(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_xchg.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_xchg.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_xchg.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_xchg.cl 2017-09-25 21:47:58.000000000 +0000 @@ -1,9 +1,20 @@ #include _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) { - return as_float(atomic_xchg((volatile global int *)p, as_int(val))); + return as_float(atomic_xchg((volatile global uint *)p, as_uint(val))); } _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) { - return as_float(atomic_xchg((volatile local int *)p, as_int(val))); + return as_float(atomic_xchg((volatile local uint *)p, as_uint(val))); } + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ + return __sync_swap_4(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_xor.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_xor.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/atomic/atomic_xor.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/atomic/atomic_xor.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_xor(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_add.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_add.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_add.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_add.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_base_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_add_8(p, val); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_base_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, TYPE val) { \ + return __sync_val_compare_and_swap_8(p, cmp, val); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_dec.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_dec.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_dec.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_dec.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_base_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) { \ + return atom_sub(p, (TYPE)1); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_inc.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_inc.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_inc.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_inc.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_base_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) { \ + return atom_add(p, (TYPE)1); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_base_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_sub_8(p, val); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_base_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \ + return __sync_swap_8(p, val); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_extended_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_and_8(p, val); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,21 @@ +#include + +#ifdef cl_khr_int64_extended_atomics + +unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long); +unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long); +unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long *, unsigned long); +unsigned long __clc__sync_fetch_and_umax_global_8(volatile global unsigned long *, unsigned long); + +#define IMPL(AS, TYPE, OP) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \ + return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \ +} + +IMPL(global, long, max) +IMPL(global, unsigned long, umax) +IMPL(local, long, max) +IMPL(local, unsigned long, umax) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,21 @@ +#include + +#ifdef cl_khr_int64_extended_atomics + +unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long); +unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long); +unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long *, unsigned long); +unsigned long __clc__sync_fetch_and_umin_global_8(volatile global unsigned long *, unsigned long); + +#define IMPL(AS, TYPE, OP) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \ + return __clc__sync_fetch_and_##OP##_##AS##_8(p, val); \ +} + +IMPL(global, long, min) +IMPL(global, unsigned long, umin) +IMPL(local, long, min) +IMPL(local, unsigned long, umin) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_extended_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_or_8(p, val); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl 2017-09-25 21:47:58.000000000 +0000 @@ -0,0 +1,16 @@ +#include + +#ifdef cl_khr_int64_extended_atomics + +#define IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_xor_8(p, val); \ +} + +IMPL(global, long) +IMPL(global, unsigned long) +IMPL(local, long) +IMPL(local, unsigned long) +#undef IMPL + +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/image/get_image_dim.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/image/get_image_dim.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/image/get_image_dim.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/image/get_image_dim.cl 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -#include - -_CLC_OVERLOAD _CLC_DEF int2 get_image_dim (image2d_t image) { - return (int2)(get_image_width(image), get_image_height(image)); -} -_CLC_OVERLOAD _CLC_DEF int4 get_image_dim (image3d_t image) { - return (int4)(get_image_width(image), get_image_height(image), - get_image_depth(image), 0); -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/add_sat.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/add_sat.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/add_sat.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/add_sat.cl 2017-10-19 16:06:04.000000000 +0000 @@ -12,35 +12,55 @@ _CLC_DECL ulong __clc_add_sat_u64(ulong, ulong); _CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) { - return __clc_add_sat_s8(x, y); + short r = x + y; + return convert_char_sat(r); } _CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) { - return __clc_add_sat_u8(x, y); + ushort r = x + y; + return convert_uchar_sat(r); } _CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) { - return __clc_add_sat_s16(x, y); + int r = x + y; + return convert_short_sat(r); } _CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) { - return __clc_add_sat_u16(x, y); + uint r = x + y; + return convert_ushort_sat(r); } _CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) { - return __clc_add_sat_s32(x, y); + int r; + if (__builtin_sadd_overflow(x, y, &r)) + // The oveflow can only occur if both are pos or both are neg, + // thus we only need to check one operand + return x > 0 ? INT_MAX : INT_MIN; + return r; } _CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) { - return __clc_add_sat_u32(x, y); + uint r; + if (__builtin_uadd_overflow(x, y, &r)) + return UINT_MAX; + return r; } _CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) { - return __clc_add_sat_s64(x, y); + long r; + if (__builtin_saddl_overflow(x, y, &r)) + // The oveflow can only occur if both are pos or both are neg, + // thus we only need to check one operand + return x > 0 ? LONG_MAX : LONG_MIN; + return r; } _CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) { - return __clc_add_sat_u64(x, y); + ulong r; + if (__builtin_uaddl_overflow(x, y, &r)) + return ULONG_MAX; + return r; } _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/add_sat_if.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/add_sat_if.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/add_sat_if.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/add_sat_if.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) - -define i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) - ret i8 %call -} - -declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) - -define i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) - ret i8 %call -} - -declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) - -define i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) - ret i16 %call -} - -declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) - -define i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) - ret i16 %call -} - -declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) - -define i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) - ret i32 %call -} - -declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) - -define i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) - ret i32 %call -} - -declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) - -define i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) - ret i64 %call -} - -declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) - -define i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) - ret i64 %call -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/add_sat_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/add_sat_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/add_sat_impl.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/add_sat_impl.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) -declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) - -define i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) - %res = extractvalue {i8, i1} %call, 0 - %over = extractvalue {i8, i1} %call, 1 - %x.msb = ashr i8 %x, 7 - %x.limit = xor i8 %x.msb, 127 - %sat = select i1 %over, i8 %x.limit, i8 %res - ret i8 %sat -} - -define i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) - %res = extractvalue {i8, i1} %call, 0 - %over = extractvalue {i8, i1} %call, 1 - %sat = select i1 %over, i8 -1, i8 %res - ret i8 %sat -} - -declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) -declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) - -define i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y) - %res = extractvalue {i16, i1} %call, 0 - %over = extractvalue {i16, i1} %call, 1 - %x.msb = ashr i16 %x, 15 - %x.limit = xor i16 %x.msb, 32767 - %sat = select i1 %over, i16 %x.limit, i16 %res - ret i16 %sat -} - -define i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y) - %res = extractvalue {i16, i1} %call, 0 - %over = extractvalue {i16, i1} %call, 1 - %sat = select i1 %over, i16 -1, i16 %res - ret i16 %sat -} - -declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) -declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) - -define i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) - %res = extractvalue {i32, i1} %call, 0 - %over = extractvalue {i32, i1} %call, 1 - %x.msb = ashr i32 %x, 31 - %x.limit = xor i32 %x.msb, 2147483647 - %sat = select i1 %over, i32 %x.limit, i32 %res - ret i32 %sat -} - -define i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) - %res = extractvalue {i32, i1} %call, 0 - %over = extractvalue {i32, i1} %call, 1 - %sat = select i1 %over, i32 -1, i32 %res - ret i32 %sat -} - -declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) -declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) - -define i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y) - %res = extractvalue {i64, i1} %call, 0 - %over = extractvalue {i64, i1} %call, 1 - %x.msb = ashr i64 %x, 63 - %x.limit = xor i64 %x.msb, 9223372036854775807 - %sat = select i1 %over, i64 %x.limit, i64 %res - ret i64 %sat -} - -define i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y) - %res = extractvalue {i64, i1} %call, 0 - %over = extractvalue {i64, i1} %call, 1 - %sat = select i1 %over, i64 -1, i64 %res - ret i64 %sat -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/clz.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/clz.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/clz.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/clz.cl 2017-10-19 16:06:04.000000000 +0000 @@ -1,46 +1,36 @@ #include #include "../clcmacro.h" -// From clz.ll -_CLC_DECL char __clc_clz_s8(char); -_CLC_DECL uchar __clc_clz_u8(uchar); -_CLC_DECL short __clc_clz_s16(short); -_CLC_DECL ushort __clc_clz_u16(ushort); -_CLC_DECL int __clc_clz_s32(int); -_CLC_DECL uint __clc_clz_u32(uint); -_CLC_DECL long __clc_clz_s64(long); -_CLC_DECL ulong __clc_clz_u64(ulong); - _CLC_OVERLOAD _CLC_DEF char clz(char x) { - return __clc_clz_s8(x); + return clz((ushort)(uchar)x) - 8; } _CLC_OVERLOAD _CLC_DEF uchar clz(uchar x) { - return __clc_clz_u8(x); + return clz((ushort)x) - 8; } _CLC_OVERLOAD _CLC_DEF short clz(short x) { - return __clc_clz_s16(x); + return x ? __builtin_clzs(x) : 16; } _CLC_OVERLOAD _CLC_DEF ushort clz(ushort x) { - return __clc_clz_u16(x); + return x ? __builtin_clzs(x) : 16; } _CLC_OVERLOAD _CLC_DEF int clz(int x) { - return __clc_clz_s32(x); + return x ? __builtin_clz(x) : 32; } _CLC_OVERLOAD _CLC_DEF uint clz(uint x) { - return __clc_clz_u32(x); + return x ? __builtin_clz(x) : 32; } _CLC_OVERLOAD _CLC_DEF long clz(long x) { - return __clc_clz_s64(x); + return x ? __builtin_clzl(x) : 64; } _CLC_OVERLOAD _CLC_DEF ulong clz(ulong x) { - return __clc_clz_u64(x); + return x ? __builtin_clzl(x) : 64; } _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, clz, char) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/clz_if.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/clz_if.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/clz_if.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/clz_if.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -declare i8 @__clc_clz_impl_s8(i8 %x) - -define i8 @__clc_clz_s8(i8 %x) nounwind readnone alwaysinline { - %call = call i8 @__clc_clz_impl_s8(i8 %x) - ret i8 %call -} - -declare i8 @__clc_clz_impl_u8(i8 %x) - -define i8 @__clc_clz_u8(i8 %x) nounwind readnone alwaysinline { - %call = call i8 @__clc_clz_impl_u8(i8 %x) - ret i8 %call -} - -declare i16 @__clc_clz_impl_s16(i16 %x) - -define i16 @__clc_clz_s16(i16 %x) nounwind readnone alwaysinline { - %call = call i16 @__clc_clz_impl_s16(i16 %x) - ret i16 %call -} - -declare i16 @__clc_clz_impl_u16(i16 %x) - -define i16 @__clc_clz_u16(i16 %x) nounwind readnone alwaysinline { - %call = call i16 @__clc_clz_impl_u16(i16 %x) - ret i16 %call -} - -declare i32 @__clc_clz_impl_s32(i32 %x) - -define i32 @__clc_clz_s32(i32 %x) nounwind readnone alwaysinline { - %call = call i32 @__clc_clz_impl_s32(i32 %x) - ret i32 %call -} - -declare i32 @__clc_clz_impl_u32(i32 %x) - -define i32 @__clc_clz_u32(i32 %x) nounwind readnone alwaysinline { - %call = call i32 @__clc_clz_impl_u32(i32 %x) - ret i32 %call -} - -declare i64 @__clc_clz_impl_s64(i64 %x) - -define i64 @__clc_clz_s64(i64 %x) nounwind readnone alwaysinline { - %call = call i64 @__clc_clz_impl_s64(i64 %x) - ret i64 %call -} - -declare i64 @__clc_clz_impl_u64(i64 %x) - -define i64 @__clc_clz_u64(i64 %x) nounwind readnone alwaysinline { - %call = call i64 @__clc_clz_impl_u64(i64 %x) - ret i64 %call -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/clz_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/clz_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/clz_impl.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/clz_impl.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -declare i8 @llvm.ctlz.i8(i8, i1) -declare i16 @llvm.ctlz.i16(i16, i1) -declare i32 @llvm.ctlz.i32(i32, i1) -declare i64 @llvm.ctlz.i64(i64, i1) - -define i8 @__clc_clz_impl_s8(i8 %x) nounwind readnone alwaysinline { - %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0) - ret i8 %call -} - -define i8 @__clc_clz_impl_u8(i8 %x) nounwind readnone alwaysinline { - %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0) - ret i8 %call -} - -define i16 @__clc_clz_impl_s16(i16 %x) nounwind readnone alwaysinline { - %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0) - ret i16 %call -} - -define i16 @__clc_clz_impl_u16(i16 %x) nounwind readnone alwaysinline { - %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0) - ret i16 %call -} - -define i32 @__clc_clz_impl_s32(i32 %x) nounwind readnone alwaysinline { - %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0) - ret i32 %call -} - -define i32 @__clc_clz_impl_u32(i32 %x) nounwind readnone alwaysinline { - %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0) - ret i32 %call -} - -define i64 @__clc_clz_impl_s64(i64 %x) nounwind readnone alwaysinline { - %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0) - ret i64 %call -} - -define i64 @__clc_clz_impl_u64(i64 %x) nounwind readnone alwaysinline { - %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0) - ret i64 %call -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/sub_sat.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/sub_sat.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/sub_sat.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/sub_sat.cl 2017-10-19 16:06:04.000000000 +0000 @@ -1,46 +1,54 @@ #include #include "../clcmacro.h" -// From sub_sat.ll -_CLC_DECL char __clc_sub_sat_s8(char, char); -_CLC_DECL uchar __clc_sub_sat_u8(uchar, uchar); -_CLC_DECL short __clc_sub_sat_s16(short, short); -_CLC_DECL ushort __clc_sub_sat_u16(ushort, ushort); -_CLC_DECL int __clc_sub_sat_s32(int, int); -_CLC_DECL uint __clc_sub_sat_u32(uint, uint); -_CLC_DECL long __clc_sub_sat_s64(long, long); -_CLC_DECL ulong __clc_sub_sat_u64(ulong, ulong); - _CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) { - return __clc_sub_sat_s8(x, y); + short r = x - y; + return convert_char_sat(r); } _CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) { - return __clc_sub_sat_u8(x, y); + short r = x - y; + return convert_uchar_sat(r); } _CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) { - return __clc_sub_sat_s16(x, y); + int r = x - y; + return convert_short_sat(r); } _CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) { - return __clc_sub_sat_u16(x, y); + int r = x - y; + return convert_ushort_sat(r); } _CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) { - return __clc_sub_sat_s32(x, y); + int r; + if (__builtin_ssub_overflow(x, y, &r)) + // The oveflow can only occur in the direction of the first operand + return x > 0 ? INT_MAX : INT_MIN; + return r; } _CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) { - return __clc_sub_sat_u32(x, y); + uint r; + if (__builtin_usub_overflow(x, y, &r)) + return 0; + return r; } _CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) { - return __clc_sub_sat_s64(x, y); + long r; + if (__builtin_ssubl_overflow(x, y, &r)) + // The oveflow can only occur in the direction of the first operand + return x > 0 ? LONG_MAX : LONG_MIN; + return r; } _CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) { - return __clc_sub_sat_u64(x, y); + ulong r; + if (__builtin_usubl_overflow(x, y, &r)) + return 0; + return r; } _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/sub_sat_if.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/sub_sat_if.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/sub_sat_if.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/sub_sat_if.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) - -define i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) - ret i8 %call -} - -declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) - -define i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) - ret i8 %call -} - -declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) - -define i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) - ret i16 %call -} - -declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) - -define i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) - ret i16 %call -} - -declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) - -define i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) - ret i32 %call -} - -declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) - -define i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) - ret i32 %call -} - -declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) - -define i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) - ret i64 %call -} - -declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) - -define i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) - ret i64 %call -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/sub_sat_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/sub_sat_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/integer/sub_sat_impl.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/integer/sub_sat_impl.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) -declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) - -define i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) - %res = extractvalue {i8, i1} %call, 0 - %over = extractvalue {i8, i1} %call, 1 - %x.msb = ashr i8 %x, 7 - %x.limit = xor i8 %x.msb, 127 - %sat = select i1 %over, i8 %x.limit, i8 %res - ret i8 %sat -} - -define i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %x, i8 %y) - %res = extractvalue {i8, i1} %call, 0 - %over = extractvalue {i8, i1} %call, 1 - %sat = select i1 %over, i8 0, i8 %res - ret i8 %sat -} - -declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16) -declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) - -define i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %x, i16 %y) - %res = extractvalue {i16, i1} %call, 0 - %over = extractvalue {i16, i1} %call, 1 - %x.msb = ashr i16 %x, 15 - %x.limit = xor i16 %x.msb, 32767 - %sat = select i1 %over, i16 %x.limit, i16 %res - ret i16 %sat -} - -define i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y) - %res = extractvalue {i16, i1} %call, 0 - %over = extractvalue {i16, i1} %call, 1 - %sat = select i1 %over, i16 0, i16 %res - ret i16 %sat -} - -declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) -declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) - -define i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 %y) - %res = extractvalue {i32, i1} %call, 0 - %over = extractvalue {i32, i1} %call, 1 - %x.msb = ashr i32 %x, 31 - %x.limit = xor i32 %x.msb, 2147483647 - %sat = select i1 %over, i32 %x.limit, i32 %res - ret i32 %sat -} - -define i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y) - %res = extractvalue {i32, i1} %call, 0 - %over = extractvalue {i32, i1} %call, 1 - %sat = select i1 %over, i32 0, i32 %res - ret i32 %sat -} - -declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) -declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) - -define i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %x, i64 %y) - %res = extractvalue {i64, i1} %call, 0 - %over = extractvalue {i64, i1} %call, 1 - %x.msb = ashr i64 %x, 63 - %x.limit = xor i64 %x.msb, 9223372036854775807 - %sat = select i1 %over, i64 %x.limit, i64 %res - ret i64 %sat -} - -define i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %x, i64 %y) - %res = extractvalue {i64, i1} %call, 0 - %over = extractvalue {i64, i1} %call, 1 - %sat = select i1 %over, i64 0, i64 %res - ret i64 %sat -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/SOURCES libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/SOURCES --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/generic/lib/SOURCES 2017-09-12 20:06:29.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/generic/lib/SOURCES 2017-10-19 16:06:04.000000000 +0000 @@ -4,8 +4,15 @@ async/async_work_group_strided_copy.cl async/prefetch.cl async/wait_group_events.cl +atomic/atomic_add.cl +atomic/atomic_and.cl +atomic/atomic_cmpxchg.cl +atomic/atomic_max.cl +atomic/atomic_min.cl +atomic/atomic_or.cl +atomic/atomic_sub.cl +atomic/atomic_xor.cl atomic/atomic_xchg.cl -atomic/atomic_impl.ll cl_khr_global_int32_base_atomics/atom_add.cl cl_khr_global_int32_base_atomics/atom_cmpxchg.cl cl_khr_global_int32_base_atomics/atom_dec.cl @@ -28,6 +35,17 @@ cl_khr_local_int32_extended_atomics/atom_min.cl cl_khr_local_int32_extended_atomics/atom_or.cl cl_khr_local_int32_extended_atomics/atom_xor.cl +cl_khr_int64_base_atomics/atom_add.cl +cl_khr_int64_base_atomics/atom_cmpxchg.cl +cl_khr_int64_base_atomics/atom_dec.cl +cl_khr_int64_base_atomics/atom_inc.cl +cl_khr_int64_base_atomics/atom_sub.cl +cl_khr_int64_base_atomics/atom_xchg.cl +cl_khr_int64_extended_atomics/atom_and.cl +cl_khr_int64_extended_atomics/atom_max.cl +cl_khr_int64_extended_atomics/atom_min.cl +cl_khr_int64_extended_atomics/atom_or.cl +cl_khr_int64_extended_atomics/atom_xor.cl convert.cl common/degrees.cl common/mix.cl @@ -46,11 +64,7 @@ integer/abs.cl integer/abs_diff.cl integer/add_sat.cl -integer/add_sat_if.ll -integer/add_sat_impl.ll integer/clz.cl -integer/clz_if.ll -integer/clz_impl.ll integer/hadd.cl integer/mad24.cl integer/mad_sat.cl @@ -59,8 +73,6 @@ integer/rhadd.cl integer/rotate.cl integer/sub_sat.cl -integer/sub_sat_if.ll -integer/sub_sat_impl.ll integer/upsample.cl math/acos.cl math/acosh.cl @@ -149,4 +161,3 @@ shared/vstore.cl workitem/get_global_id.cl workitem/get_global_size.cl -image/get_image_dim.cl diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/integer/add_sat.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/integer/add_sat.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/integer/add_sat.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/integer/add_sat.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) - -define ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) - ret i8 %call -} - -declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) - -define ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) - ret i8 %call -} - -declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) - -define ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) - ret i16 %call -} - -declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) - -define ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) - ret i16 %call -} - -declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) - -define ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) - ret i32 %call -} - -declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) - -define ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) - ret i32 %call -} - -declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) - -define ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) - ret i64 %call -} - -declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) - -define ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) - ret i64 %call -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/integer/sub_sat.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/integer/sub_sat.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/integer/sub_sat.ll 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/integer/sub_sat.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) - -define ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) - ret i8 %call -} - -declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) - -define ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline { - %call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) - ret i8 %call -} - -declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) - -define ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) - ret i16 %call -} - -declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) - -define ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline { - %call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) - ret i16 %call -} - -declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) - -define ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) - ret i32 %call -} - -declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) - -define ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline { - %call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) - ret i32 %call -} - -declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) - -define ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) - ret i64 %call -} - -declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) - -define ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline { - %call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) - ret i64 %call -} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/math/nextafter.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/math/nextafter.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/math/nextafter.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/math/nextafter.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,10 @@ +#include +#include "../lib/clcmacro.h" +#include + +_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double) +#endif diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/OVERRIDES libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/OVERRIDES --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/OVERRIDES 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/OVERRIDES 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -integer/add_sat_if.ll -integer/sub_sat_if.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/shared/vload_half_helpers.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/shared/vload_half_helpers.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/shared/vload_half_helpers.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/shared/vload_half_helpers.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,23 @@ +define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline { + %data = load half, half addrspace(0)* %ptr + %res = fpext half %data to float + ret float %res +} + +define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline { + %data = load half, half addrspace(1)* %ptr + %res = fpext half %data to float + ret float %res +} + +define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline { + %data = load half, half addrspace(3)* %ptr + %res = fpext half %data to float + ret float %res +} + +define float @__clc_vload_half_float_helper__constant(half addrspace(4)* nocapture %ptr) nounwind alwaysinline { + %data = load half, half addrspace(4)* %ptr + %res = fpext half %data to float + ret float %res +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/shared/vstore_half_helpers.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/shared/vstore_half_helpers.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/shared/vstore_half_helpers.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/shared/vstore_half_helpers.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,35 @@ +define void @__clc_vstore_half_float_helper__private(float %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc float %data to half + store half %res, half addrspace(0)* %ptr + ret void +} + +define void @__clc_vstore_half_float_helper__global(float %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc float %data to half + store half %res, half addrspace(1)* %ptr + ret void +} + +define void @__clc_vstore_half_float_helper__local(float %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc float %data to half + store half %res, half addrspace(3)* %ptr + ret void +} + +define void @__clc_vstore_half_double_helper__private(double %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc double %data to half + store half %res, half addrspace(0)* %ptr + ret void +} + +define void @__clc_vstore_half_double_helper__global(double %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc double %data to half + store half %res, half addrspace(1)* %ptr + ret void +} + +define void @__clc_vstore_half_double_helper__local(double %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc double %data to half + store half %res, half addrspace(3)* %ptr + ret void +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/SOURCES libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/SOURCES --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/SOURCES 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/SOURCES 2017-10-19 16:06:04.000000000 +0000 @@ -1,2 +1 @@ -integer/add_sat.ll -integer/sub_sat.ll \ No newline at end of file +math/nextafter.cl diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/SOURCES_3.9 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/SOURCES_3.9 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/SOURCES_3.9 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/SOURCES_3.9 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,2 @@ +shared/vload_half_helpers.ll +shared/vstore_half_helpers.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/SOURCES_4.0 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/SOURCES_4.0 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/SOURCES_4.0 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/SOURCES_4.0 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,2 @@ +shared/vload_half_helpers.ll +shared/vstore_half_helpers.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/SOURCES_5.0 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/SOURCES_5.0 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx/lib/SOURCES_5.0 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx/lib/SOURCES_5.0 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,2 @@ +shared/vload_half_helpers.ll +shared/vstore_half_helpers.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx-nvidiacl/lib/mem_fence/fence.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx-nvidiacl/lib/mem_fence/fence.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx-nvidiacl/lib/mem_fence/fence.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx-nvidiacl/lib/mem_fence/fence.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,15 @@ +#include + +_CLC_DEF void mem_fence(cl_mem_fence_flags flags) { + if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE)) + __nvvm_membar_cta(); +} + +// We do not have separate mechanism for read and write fences. +_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) { + mem_fence(flags); +} + +_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) { + mem_fence(flags); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx-nvidiacl/lib/SOURCES libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx-nvidiacl/lib/SOURCES --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx-nvidiacl/lib/SOURCES 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx-nvidiacl/lib/SOURCES 2017-10-19 16:06:04.000000000 +0000 @@ -1,3 +1,4 @@ +mem_fence/fence.cl synchronization/barrier.cl workitem/get_global_id.cl workitem/get_group_id.cl diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx-nvidiacl/lib/synchronization/barrier.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx-nvidiacl/lib/synchronization/barrier.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/ptx-nvidiacl/lib/synchronization/barrier.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/ptx-nvidiacl/lib/synchronization/barrier.cl 2017-10-19 16:06:04.000000000 +0000 @@ -1,8 +1,6 @@ #include _CLC_DEF void barrier(cl_mem_fence_flags flags) { - if (flags & CLK_LOCAL_MEM_FENCE) { - __syncthreads(); - } + __syncthreads(); } diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_attributes_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_attributes_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_attributes_impl.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_attributes_impl.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,87 @@ +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque + +declare i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)*) nounwind readnone + +declare [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)*) nounwind readnone + +declare [2 x i32] @llvm.OpenCL.image.get.format.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare [2 x i32] @llvm.OpenCL.image.get.format.3d( + %opencl.image3d_t addrspace(1)*) nounwind readnone + +define i32 @__clc_get_image_width_2d( + %opencl.image2d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 0 + ret i32 %2 +} +define i32 @__clc_get_image_width_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 0 + ret i32 %2 +} + +define i32 @__clc_get_image_height_2d( + %opencl.image2d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 1 + ret i32 %2 +} +define i32 @__clc_get_image_height_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 1 + ret i32 %2 +} + +define i32 @__clc_get_image_depth_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [3 x i32] %1, 2 + ret i32 %2 +} + +define i32 @__clc_get_image_channel_data_type_2d( + %opencl.image2d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d( + %opencl.image2d_t addrspace(1)* %img) + %2 = extractvalue [2 x i32] %1, 0 + ret i32 %2 +} +define i32 @__clc_get_image_channel_data_type_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [2 x i32] %1, 0 + ret i32 %2 +} + +define i32 @__clc_get_image_channel_order_2d( + %opencl.image2d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d( + %opencl.image2d_t addrspace(1)* %img) + %2 = extractvalue [2 x i32] %1, 1 + ret i32 %2 +} +define i32 @__clc_get_image_channel_order_3d( + %opencl.image3d_t addrspace(1)* nocapture %img) #0 { + %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d( + %opencl.image3d_t addrspace(1)* %img) + %2 = extractvalue [2 x i32] %1, 1 + ret i32 %2 +} + +attributes #0 = { nounwind readnone alwaysinline } diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_channel_data_type.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_channel_data_type.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_channel_data_type.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_channel_data_type.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,13 @@ +#include + +_CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t); +_CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_channel_data_type(image2d_t image) { + return __clc_get_image_channel_data_type_2d(image); +} +_CLC_OVERLOAD _CLC_DEF int +get_image_channel_data_type(image3d_t image) { + return __clc_get_image_channel_data_type_3d(image); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_channel_order.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_channel_order.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_channel_order.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_channel_order.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,13 @@ +#include + +_CLC_DECL int __clc_get_image_channel_order_2d(image2d_t); +_CLC_DECL int __clc_get_image_channel_order_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_channel_order(image2d_t image) { + return __clc_get_image_channel_order_2d(image); +} +_CLC_OVERLOAD _CLC_DEF int +get_image_channel_order(image3d_t image) { + return __clc_get_image_channel_order_3d(image); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_depth.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_depth.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_depth.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_depth.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,8 @@ +#include + +_CLC_DECL int __clc_get_image_depth_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_depth(image3d_t image) { + return __clc_get_image_depth_3d(image); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_dim.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_dim.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_dim.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_dim.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,9 @@ +#include + +_CLC_OVERLOAD _CLC_DEF int2 get_image_dim (image2d_t image) { + return (int2)(get_image_width(image), get_image_height(image)); +} +_CLC_OVERLOAD _CLC_DEF int4 get_image_dim (image3d_t image) { + return (int4)(get_image_width(image), get_image_height(image), + get_image_depth(image), 0); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_height.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_height.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_height.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_height.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,13 @@ +#include + +_CLC_DECL int __clc_get_image_height_2d(image2d_t); +_CLC_DECL int __clc_get_image_height_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_height(image2d_t image) { + return __clc_get_image_height_2d(image); +} +_CLC_OVERLOAD _CLC_DEF int +get_image_height(image3d_t image) { + return __clc_get_image_height_3d(image); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_width.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_width.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/get_image_width.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/get_image_width.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,13 @@ +#include + +_CLC_DECL int __clc_get_image_width_2d(image2d_t); +_CLC_DECL int __clc_get_image_width_3d(image3d_t); + +_CLC_OVERLOAD _CLC_DEF int +get_image_width(image2d_t image) { + return __clc_get_image_width_2d(image); +} +_CLC_OVERLOAD _CLC_DEF int +get_image_width(image3d_t image) { + return __clc_get_image_width_3d(image); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/read_imagef.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/read_imagef.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/read_imagef.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/read_imagef.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,14 @@ +#include + +_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); + +_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, + int2 coord) { + float2 coord_float = (float2)(coord.x, coord.y); + return __clc_read_imagef_tex(image, sampler, coord_float); +} + +_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler, + float2 coord) { + return __clc_read_imagef_tex(image, sampler, coord); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/read_imagei.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/read_imagei.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/read_imagei.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/read_imagei.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,23 @@ +#include + +_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); + +int4 __clc_reinterpret_v4f_to_v4i(float4 v) { + union { + int4 v4i; + float4 v4f; + } res = { .v4f = v}; + return res.v4i; +} + +_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, + int2 coord) { + float2 coord_float = (float2)(coord.x, coord.y); + return __clc_reinterpret_v4f_to_v4i( + __clc_read_imagef_tex(image, sampler, coord_float)); +} +_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler, + float2 coord) { + return __clc_reinterpret_v4f_to_v4i( + __clc_read_imagef_tex(image, sampler, coord)); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/read_image_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/read_image_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/read_image_impl.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/read_image_impl.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,46 @@ +%opencl.image2d_t = type opaque + +declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, + i32, i32, i32) readnone +declare i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone + +define <4 x float> @__clc_v4f_from_v2f(<2 x float> %v) alwaysinline { + %e0 = extractelement <2 x float> %v, i32 0 + %e1 = extractelement <2 x float> %v, i32 1 + %res.0 = insertelement <4 x float> undef, float %e0, i32 0 + %res.1 = insertelement <4 x float> %res.0, float %e1, i32 1 + %res.2 = insertelement <4 x float> %res.1, float 0.0, i32 2 + %res.3 = insertelement <4 x float> %res.2, float 0.0, i32 3 + ret <4 x float> %res.3 +} + +define <4 x float> @__clc_read_imagef_tex( + %opencl.image2d_t addrspace(1)* nocapture %img, + i32 %sampler, <2 x float> %coord) alwaysinline { +entry: + %coord_v4 = call <4 x float> @__clc_v4f_from_v2f(<2 x float> %coord) + %smp_id = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %sampler) + %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %img) + %tex_id = add i32 %img_id, 2 ; First 2 IDs are reserved. + + %coord_norm = and i32 %sampler, 1 + %is_norm = icmp eq i32 %coord_norm, 1 + br i1 %is_norm, label %NormCoord, label %UnnormCoord +NormCoord: + %data.norm = call <4 x float> @llvm.R600.tex( + <4 x float> %coord_v4, + i32 0, i32 0, i32 0, ; Offset. + i32 2, i32 %smp_id, + i32 1, i32 1, i32 1, i32 1) ; Normalized coords. + ret <4 x float> %data.norm +UnnormCoord: + %data.unnorm = call <4 x float> @llvm.R600.tex( + <4 x float> %coord_v4, + i32 0, i32 0, i32 0, ; Offset. + i32 %tex_id, i32 %smp_id, + i32 0, i32 0, i32 0, i32 0) ; Unnormalized coords. + ret <4 x float> %data.unnorm +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/read_imageui.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/read_imageui.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/read_imageui.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/read_imageui.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,23 @@ +#include + +_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2); + +uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) { + union { + uint4 v4ui; + float4 v4f; + } res = { .v4f = v}; + return res.v4ui; +} + +_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, + int2 coord) { + float2 coord_float = (float2)(coord.x, coord.y); + return __clc_reinterpret_v4f_to_v4ui( + __clc_read_imagef_tex(image, sampler, coord_float)); +} +_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler, + float2 coord) { + return __clc_reinterpret_v4f_to_v4ui( + __clc_read_imagef_tex(image, sampler, coord)); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/write_imagef.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/write_imagef.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/write_imagef.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/write_imagef.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,9 @@ +#include + +_CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color); + +_CLC_OVERLOAD _CLC_DEF void +write_imagef(image2d_t image, int2 coord, float4 color) +{ + __clc_write_imagef_2d(image, coord, color); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/write_imagei.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/write_imagei.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/write_imagei.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/write_imagei.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,9 @@ +#include + +_CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color); + +_CLC_OVERLOAD _CLC_DEF void +write_imagei(image2d_t image, int2 coord, int4 color) +{ + __clc_write_imagei_2d(image, coord, color); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/write_image_impl.ll libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/write_image_impl.ll --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/write_image_impl.ll 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/write_image_impl.ll 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,52 @@ +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque + +declare i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)*) nounwind readnone +declare i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)*) nounwind readnone + +declare void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord, i32 %rat_id) + +define void @__clc_write_imageui_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x i32> %color) #0 { + + ; Coordinate int2 -> int4. + %e0 = extractelement <2 x i32> %coord, i32 0 + %e1 = extractelement <2 x i32> %coord, i32 1 + %coord.0 = insertelement <4 x i32> undef, i32 %e0, i32 0 + %coord.1 = insertelement <4 x i32> %coord.0, i32 %e1, i32 1 + %coord.2 = insertelement <4 x i32> %coord.1, i32 0, i32 2 + %coord.3 = insertelement <4 x i32> %coord.2, i32 0, i32 3 + + ; Get RAT ID. + %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %img) + %rat_id = add i32 %img_id, 1 + + ; Call store intrinsic. + call void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord.3, i32 %rat_id) + ret void +} + +define void @__clc_write_imagei_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x i32> %color) #0 { + call void @__clc_write_imageui_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x i32> %color) + ret void +} + +define void @__clc_write_imagef_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x float> %color) #0 { + %color.i32 = bitcast <4 x float> %color to <4 x i32> + call void @__clc_write_imageui_2d( + %opencl.image2d_t addrspace(1)* nocapture %img, + <2 x i32> %coord, <4 x i32> %color.i32) + ret void +} + +attributes #0 = { alwaysinline } diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/write_imageui.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/write_imageui.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/image/write_imageui.cl 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/image/write_imageui.cl 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,9 @@ +#include + +_CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color); + +_CLC_OVERLOAD _CLC_DEF void +write_imageui(image2d_t image, int2 coord, uint4 color) +{ + __clc_write_imageui_2d(image, coord, color); +} diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/SOURCES_3.9 libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/SOURCES_3.9 --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/SOURCES_3.9 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/SOURCES_3.9 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,15 @@ +image/get_image_dim.cl +image/get_image_width.cl +image/get_image_height.cl +image/get_image_depth.cl +image/get_image_channel_data_type.cl +image/get_image_channel_order.cl +image/get_image_attributes_impl.ll +image/read_imagef.cl +image/read_imagei.cl +image/read_imageui.cl +image/read_image_impl.ll +image/write_imagef.cl +image/write_imagei.cl +image/write_imageui.cl +image/write_image_impl.ll diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/workitem/get_work_dim.cl libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/workitem/get_work_dim.cl --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/r600/lib/workitem/get_work_dim.cl 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/r600/lib/workitem/get_work_dim.cl 2017-10-19 16:06:04.000000000 +0000 @@ -1,6 +1,6 @@ #include -_CLC_DEF uint get_work_dim() +_CLC_DEF uint get_work_dim(void) { __attribute__((address_space(7))) uint * ptr = (__attribute__((address_space(7))) uint *) diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/.travis.yml libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/.travis.yml --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/.travis.yml 1970-01-01 00:00:00.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/.travis.yml 2017-10-19 16:06:04.000000000 +0000 @@ -0,0 +1,64 @@ +language: cpp + +sudo: false +dist: trusty + +cache: + apt: true + + +matrix: + include: + - env: + - LABEL="make gcc LLVM-3.9" + - LLVM_VERSION=3.9 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + - CHECK_FILES="barts-r600--.bc cayman-r600--.bc cedar-r600--.bc cypress-r600--.bc tahiti-amdgcn--.bc amdgcn--amdhsa.bc nvptx--nvidiacl.bc nvptx64--nvidiacl.bc" + addons: + apt: + sources: + - llvm-toolchain-trusty-3.9 + packages: + - libedit-dev + - g++-4.8 + # From sources above + - llvm-3.9-dev + - clang-3.9 + - env: + - LABEL="make gcc LLVM-4.0" + - LLVM_VERSION=4.0 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + - CHECK_FILES="barts-r600--.bc cayman-r600--.bc cedar-r600--.bc cypress-r600--.bc tahiti-amdgcn--.bc amdgcn--amdhsa.bc nvptx--nvidiacl.bc nvptx64--nvidiacl.bc" + addons: + apt: + sources: + - llvm-toolchain-trusty-4.0 + packages: + - libedit-dev + - g++-4.8 + # From sources above + - llvm-4.0-dev + - clang-4.0 + - env: + - LABEL="make gcc LLVM-5.0" + - LLVM_VERSION=5.0 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + - CHECK_FILES="barts-r600--.bc cayman-r600--.bc cedar-r600--.bc cypress-r600--.bc tahiti-amdgcn--.bc amdgcn--amdhsa.bc nvptx--nvidiacl.bc nvptx64--nvidiacl.bc" + addons: + apt: + sources: + - llvm-toolchain-trusty-5.0 + packages: + - libedit-dev + - g++-4.8 + # From sources above + - llvm-5.0-dev + - clang-5.0 + +script: + - $PYTHON ./configure.py --with-llvm-config=$LLVM_CONFIG --with-cxx-compiler=$CXX && make -j4 + - ret=0; + for f in $CHECK_FILES; do + ./check_external_calls.sh built_libs/$f || ret=1; + done; + test $ret -eq 0 diff -Nru libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/utils/prepare-builtins.cpp libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/utils/prepare-builtins.cpp --- libclc-0.2.0+git20170912.1707.3ab9165~x~padoka0/utils/prepare-builtins.cpp 2017-08-10 16:12:31.000000000 +0000 +++ libclc-0.2.0+git20171019.1407.b61116b~x~padoka0/utils/prepare-builtins.cpp 2017-10-19 16:06:04.000000000 +0000 @@ -1,5 +1,10 @@ +#if HAVE_LLVM > 0x0390 #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" +#else +#include "llvm/Bitcode/ReaderWriter.h" +#endif + #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" @@ -41,8 +46,12 @@ } else { std::unique_ptr &BufferPtr = BufferOrErr.get(); ErrorOr> ModuleOrErr = +#if HAVE_LLVM > 0x0390 expectedToErrorOrAndEmitErrors(Context, parseBitcodeFile(BufferPtr.get()->getMemBufferRef(), Context)); +#else + parseBitcodeFile(BufferPtr.get()->getMemBufferRef(), Context); +#endif if (std::error_code ec = ModuleOrErr.getError()) ErrorMessage = ec.message(); @@ -84,8 +93,13 @@ } std::error_code EC; - std::unique_ptr Out - (new tool_output_file(OutputFilename, EC, sys::fs::F_None)); +#if HAVE_LLVM >= 0x0600 + std::unique_ptr Out( + new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); +#else + std::unique_ptr Out( + new tool_output_file(OutputFilename, EC, sys::fs::F_None)); +#endif if (EC) { errs() << EC.message() << '\n'; exit(1);