diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/math/ldexp.cl libclc-0.2.0+git20170213/amdgcn/lib/math/ldexp.cl
--- libclc-0.2.0+git20150813/amdgcn/lib/math/ldexp.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/math/ldexp.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include
+
+#include "../../../generic/lib/clcmacro.h"
+
+#ifdef __HAS_LDEXPF__
+#define BUILTINF __builtin_amdgcn_ldexpf
+#else
+#include "math/clc_ldexp.h"
+#define BUILTINF __clc_ldexp
+#endif
+
+// This defines all the ldexp(floatN, intN) variants.
+_CLC_DEFINE_BINARY_BUILTIN(float, ldexp, BUILTINF, float, int);
+
+#ifdef cl_khr_fp64
+ #pragma OPENCL EXTENSION cl_khr_fp64 : enable
+ // This defines all the ldexp(doubleN, intN) variants.
+ _CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __builtin_amdgcn_ldexp, double, int);
+#endif
+
+// This defines all the ldexp(GENTYPE, int);
+#define __CLC_BODY <../../../generic/lib/math/ldexp.inc>
+#include
+
+#undef BUILTINF
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/SOURCES libclc-0.2.0+git20170213/amdgcn/lib/SOURCES
--- libclc-0.2.0+git20150813/amdgcn/lib/SOURCES 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/SOURCES 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,9 @@
+math/ldexp.cl
+synchronization/barrier_impl.ll
+workitem/get_global_offset.cl
+workitem/get_group_id.cl
+workitem/get_global_size.ll
+workitem/get_local_id.cl
+workitem/get_local_size.ll
+workitem/get_num_groups.ll
+workitem/get_work_dim.cl
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/synchronization/barrier_impl.ll libclc-0.2.0+git20170213/amdgcn/lib/synchronization/barrier_impl.ll
--- libclc-0.2.0+git20150813/amdgcn/lib/synchronization/barrier_impl.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/synchronization/barrier_impl.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,32 @@
+declare i32 @__clc_clk_local_mem_fence() #1
+declare i32 @__clc_clk_global_mem_fence() #1
+declare void @llvm.amdgcn.s.barrier() #0
+
+define void @barrier(i32 %flags) #2 {
+barrier_local_test:
+ %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
+ %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
+ %1 = icmp ne i32 %0, 0
+ br i1 %1, label %barrier_local, label %barrier_global_test
+
+barrier_local:
+ call void @llvm.amdgcn.s.barrier()
+ br label %barrier_global_test
+
+barrier_global_test:
+ %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
+ %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
+ %3 = icmp ne i32 %2, 0
+ br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+ call void @llvm.amdgcn.s.barrier()
+ br label %done
+
+done:
+ ret void
+}
+
+attributes #0 = { nounwind convergent }
+attributes #1 = { nounwind alwaysinline }
+attributes #2 = { nounwind convergent alwaysinline }
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_global_offset.cl libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_global_offset.cl
--- libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_global_offset.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_global_offset.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,11 @@
+#include
+
+_CLC_DEF size_t get_global_offset(uint dim)
+{
+ __attribute__((address_space(2))) uint * ptr =
+ (__attribute__((address_space(2))) uint *)
+ __builtin_amdgcn_implicitarg_ptr();
+ if (dim < 3)
+ return ptr[dim + 1];
+ return 0;
+}
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_global_size.ll libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_global_size.ll
--- libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_global_size.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_global_size.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,21 @@
+declare i32 @llvm.r600.read.global.size.x() nounwind readnone
+declare i32 @llvm.r600.read.global.size.y() nounwind readnone
+declare i32 @llvm.r600.read.global.size.z() nounwind readnone
+
+define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
+ switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
+x_dim:
+ %x = call i32 @llvm.r600.read.global.size.x()
+ %x.ext = zext i32 %x to i64
+ ret i64 %x.ext
+y_dim:
+ %y = call i32 @llvm.r600.read.global.size.y()
+ %y.ext = zext i32 %y to i64
+ ret i64 %y.ext
+z_dim:
+ %z = call i32 @llvm.r600.read.global.size.z()
+ %z.ext = zext i32 %z to i64
+ ret i64 %z.ext
+default:
+ ret i64 1
+}
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_group_id.cl libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_group_id.cl
--- libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_group_id.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_group_id.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,11 @@
+#include
+
+_CLC_DEF size_t get_group_id(uint dim)
+{
+ switch(dim) {
+ case 0: return __builtin_amdgcn_workgroup_id_x();
+ case 1: return __builtin_amdgcn_workgroup_id_y();
+ case 2: return __builtin_amdgcn_workgroup_id_z();
+ default: return 1;
+ }
+}
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_local_id.cl libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_local_id.cl
--- libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_local_id.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_local_id.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,11 @@
+#include
+
+_CLC_DEF size_t get_local_id(uint dim)
+{
+ switch(dim) {
+ case 0: return __builtin_amdgcn_workitem_id_x();
+ case 1: return __builtin_amdgcn_workitem_id_y();
+ case 2: return __builtin_amdgcn_workitem_id_z();
+ default: return 1;
+ }
+}
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_local_size.ll libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_local_size.ll
--- libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_local_size.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_local_size.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,21 @@
+declare i32 @llvm.r600.read.local.size.x() nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() nounwind readnone
+
+define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
+ switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
+x_dim:
+ %x = call i32 @llvm.r600.read.local.size.x()
+ %x.ext = zext i32 %x to i64
+ ret i64 %x.ext
+y_dim:
+ %y = call i32 @llvm.r600.read.local.size.y()
+ %y.ext = zext i32 %y to i64
+ ret i64 %y.ext
+z_dim:
+ %z = call i32 @llvm.r600.read.local.size.z()
+ %z.ext = zext i32 %z to i64
+ ret i64 %z.ext
+default:
+ ret i64 1
+}
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_num_groups.ll libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_num_groups.ll
--- libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_num_groups.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_num_groups.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,21 @@
+declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
+declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
+declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
+
+define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
+ switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
+x_dim:
+ %x = call i32 @llvm.r600.read.ngroups.x()
+ %x.ext = zext i32 %x to i64
+ ret i64 %x.ext
+y_dim:
+ %y = call i32 @llvm.r600.read.ngroups.y()
+ %y.ext = zext i32 %y to i64
+ ret i64 %y.ext
+z_dim:
+ %z = call i32 @llvm.r600.read.ngroups.z()
+ %z.ext = zext i32 %z to i64
+ ret i64 %z.ext
+default:
+ ret i64 1
+}
diff -Nru libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_work_dim.cl libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_work_dim.cl
--- libclc-0.2.0+git20150813/amdgcn/lib/workitem/get_work_dim.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn/lib/workitem/get_work_dim.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,9 @@
+#include
+
+_CLC_DEF uint get_work_dim()
+{
+ __attribute__((address_space(2))) uint * ptr =
+ (__attribute__((address_space(2))) uint *)
+ __builtin_amdgcn_implicitarg_ptr();
+ return ptr[0];
+}
diff -Nru libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/OVERRIDES libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/OVERRIDES
--- libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/OVERRIDES 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/OVERRIDES 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1 @@
+workitem/get_num_groups.ll
diff -Nru libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/SOURCES libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/SOURCES
--- libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/SOURCES 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/SOURCES 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,3 @@
+workitem/get_global_size.ll
+workitem/get_local_size.ll
+workitem/get_num_groups.cl
diff -Nru libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/workitem/get_global_size.ll libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/workitem/get_global_size.ll
--- libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/workitem/get_global_size.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/workitem/get_global_size.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,39 @@
+declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+
+define i64 @get_global_size(i32 %dim) #1 {
+ %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ switch i32 %dim, label %default [
+ i32 0, label %x
+ i32 1, label %y
+ i32 2, label %z
+ ]
+
+x:
+ %ptr_x = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 12
+ %ptr_x32 = bitcast i8 addrspace(2)* %ptr_x to i32 addrspace(2)*
+ %x32 = load i32, i32 addrspace(2)* %ptr_x32, align 4, !invariant.load !0
+ %size_x = zext i32 %x32 to i64
+ ret i64 %size_x
+
+y:
+ %ptr_y = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 16
+ %ptr_y32 = bitcast i8 addrspace(2)* %ptr_y to i32 addrspace(2)*
+ %y32 = load i32, i32 addrspace(2)* %ptr_y32, align 4, !invariant.load !0
+ %size_y = zext i32 %y32 to i64
+ ret i64 %size_y
+
+z:
+ %ptr_z = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 20
+ %ptr_z32 = bitcast i8 addrspace(2)* %ptr_z to i32 addrspace(2)*
+ %z32 = load i32, i32 addrspace(2)* %ptr_z32, align 4, !invariant.load !0
+ %size_z = zext i32 %z32 to i64
+ ret i64 %size_z
+
+default:
+ ret i64 1
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { alwaysinline norecurse nounwind readonly }
+
+!0 = !{}
diff -Nru libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/workitem/get_local_size.ll libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/workitem/get_local_size.ll
--- libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/workitem/get_local_size.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/workitem/get_local_size.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,38 @@
+declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+
+define i64 @get_local_size(i32 %dim) #1 {
+ %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %dispatch_ptr_i32 = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
+ %xy_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 1
+ %xy_size = load i32, i32 addrspace(2)* %xy_size_ptr, align 4, !invariant.load !0
+ switch i32 %dim, label %default [
+ i32 0, label %x_dim
+ i32 1, label %y_dim
+ i32 2, label %z_dim
+ ]
+
+x_dim:
+ %x_size = and i32 %xy_size, 65535
+ %x_size.ext = zext i32 %x_size to i64
+ ret i64 %x_size.ext
+
+y_dim:
+ %y_size = lshr i32 %xy_size, 16
+ %y_size.ext = zext i32 %y_size to i64
+ ret i64 %y_size.ext
+
+z_dim:
+ %z_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 2
+ %z_size = load i32, i32 addrspace(2)* %z_size_ptr, align 4, !invariant.load !0, !range !1
+ %z_size.ext = zext i32 %z_size to i64
+ ret i64 %z_size.ext
+
+default:
+ ret i64 1
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { alwaysinline norecurse nounwind readonly }
+
+!0 = !{}
+!1 = !{ i32 0, i32 257 }
diff -Nru libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/workitem/get_num_groups.cl libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/workitem/get_num_groups.cl
--- libclc-0.2.0+git20150813/amdgcn-amdhsa/lib/workitem/get_num_groups.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgcn-amdhsa/lib/workitem/get_num_groups.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,12 @@
+
+#include
+
+_CLC_DEF size_t get_num_groups(uint dim) {
+ size_t global_size = get_global_size(dim);
+ size_t local_size = get_local_size(dim);
+ size_t num_groups = global_size / local_size;
+ if (global_size % local_size != 0) {
+ num_groups++;
+ }
+ return num_groups;
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/atomic/atomic.cl libclc-0.2.0+git20170213/amdgpu/lib/atomic/atomic.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/atomic/atomic.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/atomic/atomic.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,65 @@
+#include
+
+#define ATOMIC_FUNC_DEFINE(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE val) { \
+ return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)val); \
+}
+
+/* For atomic functions that don't need different bitcode dependending on argument signedness */
+#define ATOMIC_FUNC_SIGN(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+ _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE); \
+ ATOMIC_FUNC_DEFINE(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+ ATOMIC_FUNC_DEFINE(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE)
+
+#define ATOMIC_FUNC_ADDRSPACE(TYPE, FUNCTION) \
+ ATOMIC_FUNC_SIGN(TYPE, FUNCTION, global, 1) \
+ ATOMIC_FUNC_SIGN(TYPE, FUNCTION, local, 3)
+
+#define ATOMIC_FUNC(FUNCTION) \
+ ATOMIC_FUNC_ADDRSPACE(int, FUNCTION)
+
+#define ATOMIC_FUNC_DEFINE_3_ARG(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE cmp, RET_SIGN TYPE val) { \
+ return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)cmp, (ARG_SIGN TYPE)val); \
+}
+
+/* For atomic functions that don't need different bitcode dependending on argument signedness */
+#define ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+ _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE, signed TYPE); \
+ ATOMIC_FUNC_DEFINE_3_ARG(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
+ ATOMIC_FUNC_DEFINE_3_ARG(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE)
+
+#define ATOMIC_FUNC_ADDRSPACE_3_ARG(TYPE, FUNCTION) \
+ ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, global, 1) \
+ ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, local, 3)
+
+#define ATOMIC_FUNC_3_ARG(FUNCTION) \
+ ATOMIC_FUNC_ADDRSPACE_3_ARG(int, FUNCTION)
+
+ATOMIC_FUNC(atomic_add)
+ATOMIC_FUNC(atomic_and)
+ATOMIC_FUNC(atomic_or)
+ATOMIC_FUNC(atomic_sub)
+ATOMIC_FUNC(atomic_xchg)
+ATOMIC_FUNC(atomic_xor)
+ATOMIC_FUNC_3_ARG(atomic_cmpxchg)
+
+_CLC_DECL signed int __clc_atomic_max_addr1(volatile global signed int*, signed int);
+_CLC_DECL signed int __clc_atomic_max_addr3(volatile local signed int*, signed int);
+_CLC_DECL uint __clc_atomic_umax_addr1(volatile global uint*, uint);
+_CLC_DECL uint __clc_atomic_umax_addr3(volatile local uint*, uint);
+
+ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, global, 1)
+ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, local, 3)
+ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, global, 1)
+ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, local, 3)
+
+_CLC_DECL signed int __clc_atomic_min_addr1(volatile global signed int*, signed int);
+_CLC_DECL signed int __clc_atomic_min_addr3(volatile local signed int*, signed int);
+_CLC_DECL uint __clc_atomic_umin_addr1(volatile global uint*, uint);
+_CLC_DECL uint __clc_atomic_umin_addr3(volatile local uint*, uint);
+
+ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, global, 1)
+ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, local, 3)
+ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, global, 1)
+ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, local, 3)
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_attributes_impl.ll libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_attributes_impl.ll
--- libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_attributes_impl.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_attributes_impl.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,87 @@
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)*) nounwind readnone
+
+declare [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)*) nounwind readnone
+
+declare [2 x i32] @llvm.OpenCL.image.get.format.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare [2 x i32] @llvm.OpenCL.image.get.format.3d(
+ %opencl.image3d_t addrspace(1)*) nounwind readnone
+
+define i32 @__clc_get_image_width_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 0
+ ret i32 %2
+}
+define i32 @__clc_get_image_width_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 0
+ ret i32 %2
+}
+
+define i32 @__clc_get_image_height_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 1
+ ret i32 %2
+}
+define i32 @__clc_get_image_height_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 1
+ ret i32 %2
+}
+
+define i32 @__clc_get_image_depth_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [3 x i32] %1, 2
+ ret i32 %2
+}
+
+define i32 @__clc_get_image_channel_data_type_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %2 = extractvalue [2 x i32] %1, 0
+ ret i32 %2
+}
+define i32 @__clc_get_image_channel_data_type_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [2 x i32] %1, 0
+ ret i32 %2
+}
+
+define i32 @__clc_get_image_channel_order_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %2 = extractvalue [2 x i32] %1, 1
+ ret i32 %2
+}
+define i32 @__clc_get_image_channel_order_3d(
+ %opencl.image3d_t addrspace(1)* nocapture %img) #0 {
+ %1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d(
+ %opencl.image3d_t addrspace(1)* %img)
+ %2 = extractvalue [2 x i32] %1, 1
+ ret i32 %2
+}
+
+attributes #0 = { nounwind readnone alwaysinline }
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_channel_data_type.cl libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_channel_data_type.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_channel_data_type.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_channel_data_type.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,13 @@
+#include
+
+_CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t);
+_CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_channel_data_type(image2d_t image) {
+ return __clc_get_image_channel_data_type_2d(image);
+}
+_CLC_OVERLOAD _CLC_DEF int
+get_image_channel_data_type(image3d_t image) {
+ return __clc_get_image_channel_data_type_3d(image);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_channel_order.cl libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_channel_order.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_channel_order.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_channel_order.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,13 @@
+#include
+
+_CLC_DECL int __clc_get_image_channel_order_2d(image2d_t);
+_CLC_DECL int __clc_get_image_channel_order_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_channel_order(image2d_t image) {
+ return __clc_get_image_channel_order_2d(image);
+}
+_CLC_OVERLOAD _CLC_DEF int
+get_image_channel_order(image3d_t image) {
+ return __clc_get_image_channel_order_3d(image);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_depth.cl libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_depth.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_depth.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_depth.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,8 @@
+#include
+
+_CLC_DECL int __clc_get_image_depth_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_depth(image3d_t image) {
+ return __clc_get_image_depth_3d(image);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_height.cl libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_height.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_height.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_height.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,13 @@
+#include
+
+_CLC_DECL int __clc_get_image_height_2d(image2d_t);
+_CLC_DECL int __clc_get_image_height_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_height(image2d_t image) {
+ return __clc_get_image_height_2d(image);
+}
+_CLC_OVERLOAD _CLC_DEF int
+get_image_height(image3d_t image) {
+ return __clc_get_image_height_3d(image);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_width.cl libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_width.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/get_image_width.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/get_image_width.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,13 @@
+#include
+
+_CLC_DECL int __clc_get_image_width_2d(image2d_t);
+_CLC_DECL int __clc_get_image_width_3d(image3d_t);
+
+_CLC_OVERLOAD _CLC_DEF int
+get_image_width(image2d_t image) {
+ return __clc_get_image_width_2d(image);
+}
+_CLC_OVERLOAD _CLC_DEF int
+get_image_width(image3d_t image) {
+ return __clc_get_image_width_3d(image);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/read_imagef.cl libclc-0.2.0+git20170213/amdgpu/lib/image/read_imagef.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/read_imagef.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/read_imagef.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,14 @@
+#include
+
+_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
+
+_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
+ int2 coord) {
+ float2 coord_float = (float2)(coord.x, coord.y);
+ return __clc_read_imagef_tex(image, sampler, coord_float);
+}
+
+_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
+ float2 coord) {
+ return __clc_read_imagef_tex(image, sampler, coord);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/read_imagei.cl libclc-0.2.0+git20170213/amdgpu/lib/image/read_imagei.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/read_imagei.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/read_imagei.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,23 @@
+#include
+
+_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
+
+int4 __clc_reinterpret_v4f_to_v4i(float4 v) {
+ union {
+ int4 v4i;
+ float4 v4f;
+ } res = { .v4f = v};
+ return res.v4i;
+}
+
+_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
+ int2 coord) {
+ float2 coord_float = (float2)(coord.x, coord.y);
+ return __clc_reinterpret_v4f_to_v4i(
+ __clc_read_imagef_tex(image, sampler, coord_float));
+}
+_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
+ float2 coord) {
+ return __clc_reinterpret_v4f_to_v4i(
+ __clc_read_imagef_tex(image, sampler, coord));
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/read_image_impl.ll libclc-0.2.0+git20170213/amdgpu/lib/image/read_image_impl.ll
--- libclc-0.2.0+git20150813/amdgpu/lib/image/read_image_impl.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/read_image_impl.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,46 @@
+%opencl.image2d_t = type opaque
+
+declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32) readnone
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone
+
+define <4 x float> @__clc_v4f_from_v2f(<2 x float> %v) alwaysinline {
+ %e0 = extractelement <2 x float> %v, i32 0
+ %e1 = extractelement <2 x float> %v, i32 1
+ %res.0 = insertelement <4 x float> undef, float %e0, i32 0
+ %res.1 = insertelement <4 x float> %res.0, float %e1, i32 1
+ %res.2 = insertelement <4 x float> %res.1, float 0.0, i32 2
+ %res.3 = insertelement <4 x float> %res.2, float 0.0, i32 3
+ ret <4 x float> %res.3
+}
+
+define <4 x float> @__clc_read_imagef_tex(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ i32 %sampler, <2 x float> %coord) alwaysinline {
+entry:
+ %coord_v4 = call <4 x float> @__clc_v4f_from_v2f(<2 x float> %coord)
+ %smp_id = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %sampler)
+ %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %tex_id = add i32 %img_id, 2 ; First 2 IDs are reserved.
+
+ %coord_norm = and i32 %sampler, 1
+ %is_norm = icmp eq i32 %coord_norm, 1
+ br i1 %is_norm, label %NormCoord, label %UnnormCoord
+NormCoord:
+ %data.norm = call <4 x float> @llvm.R600.tex(
+ <4 x float> %coord_v4,
+ i32 0, i32 0, i32 0, ; Offset.
+ i32 2, i32 %smp_id,
+ i32 1, i32 1, i32 1, i32 1) ; Normalized coords.
+ ret <4 x float> %data.norm
+UnnormCoord:
+ %data.unnorm = call <4 x float> @llvm.R600.tex(
+ <4 x float> %coord_v4,
+ i32 0, i32 0, i32 0, ; Offset.
+ i32 %tex_id, i32 %smp_id,
+ i32 0, i32 0, i32 0, i32 0) ; Unnormalized coords.
+ ret <4 x float> %data.unnorm
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/read_imageui.cl libclc-0.2.0+git20170213/amdgpu/lib/image/read_imageui.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/read_imageui.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/read_imageui.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,23 @@
+#include
+
+_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
+
+uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) {
+ union {
+ uint4 v4ui;
+ float4 v4f;
+ } res = { .v4f = v};
+ return res.v4ui;
+}
+
+_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
+ int2 coord) {
+ float2 coord_float = (float2)(coord.x, coord.y);
+ return __clc_reinterpret_v4f_to_v4ui(
+ __clc_read_imagef_tex(image, sampler, coord_float));
+}
+_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
+ float2 coord) {
+ return __clc_reinterpret_v4f_to_v4ui(
+ __clc_read_imagef_tex(image, sampler, coord));
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/write_imagef.cl libclc-0.2.0+git20170213/amdgpu/lib/image/write_imagef.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/write_imagef.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/write_imagef.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,9 @@
+#include
+
+_CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color);
+
+_CLC_OVERLOAD _CLC_DEF void
+write_imagef(image2d_t image, int2 coord, float4 color)
+{
+ __clc_write_imagef_2d(image, coord, color);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/write_imagei.cl libclc-0.2.0+git20170213/amdgpu/lib/image/write_imagei.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/write_imagei.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/write_imagei.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,9 @@
+#include
+
+_CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color);
+
+_CLC_OVERLOAD _CLC_DEF void
+write_imagei(image2d_t image, int2 coord, int4 color)
+{
+ __clc_write_imagei_2d(image, coord, color);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/write_image_impl.ll libclc-0.2.0+git20170213/amdgpu/lib/image/write_image_impl.ll
--- libclc-0.2.0+git20150813/amdgpu/lib/image/write_image_impl.ll 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/write_image_impl.ll 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,52 @@
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)*) nounwind readnone
+declare i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)*) nounwind readnone
+
+declare void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord, i32 %rat_id)
+
+define void @__clc_write_imageui_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x i32> %color) #0 {
+
+ ; Coordinate int2 -> int4.
+ %e0 = extractelement <2 x i32> %coord, i32 0
+ %e1 = extractelement <2 x i32> %coord, i32 1
+ %coord.0 = insertelement <4 x i32> undef, i32 %e0, i32 0
+ %coord.1 = insertelement <4 x i32> %coord.0, i32 %e1, i32 1
+ %coord.2 = insertelement <4 x i32> %coord.1, i32 0, i32 2
+ %coord.3 = insertelement <4 x i32> %coord.2, i32 0, i32 3
+
+ ; Get RAT ID.
+ %img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %img)
+ %rat_id = add i32 %img_id, 1
+
+ ; Call store intrinsic.
+ call void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord.3, i32 %rat_id)
+ ret void
+}
+
+define void @__clc_write_imagei_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x i32> %color) #0 {
+ call void @__clc_write_imageui_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x i32> %color)
+ ret void
+}
+
+define void @__clc_write_imagef_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x float> %color) #0 {
+ %color.i32 = bitcast <4 x float> %color to <4 x i32>
+ call void @__clc_write_imageui_2d(
+ %opencl.image2d_t addrspace(1)* nocapture %img,
+ <2 x i32> %coord, <4 x i32> %color.i32)
+ ret void
+}
+
+attributes #0 = { alwaysinline }
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/image/write_imageui.cl libclc-0.2.0+git20170213/amdgpu/lib/image/write_imageui.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/image/write_imageui.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/image/write_imageui.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,9 @@
+#include
+
+_CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color);
+
+_CLC_OVERLOAD _CLC_DEF void
+write_imageui(image2d_t image, int2 coord, uint4 color)
+{
+ __clc_write_imageui_2d(image, coord, color);
+}
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/math/nextafter.cl libclc-0.2.0+git20170213/amdgpu/lib/math/nextafter.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/math/nextafter.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/math/nextafter.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,9 @@
+#include
+#include "../lib/clcmacro.h"
+
+_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double)
+#endif
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/math/sqrt.cl libclc-0.2.0+git20170213/amdgpu/lib/math/sqrt.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/math/sqrt.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/math/sqrt.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include
+#include "../../../generic/lib/clcmacro.h"
+#include "math/clc_sqrt.h"
+
+_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#ifdef __AMDGCN__
+ #define __clc_builtin_rsq __builtin_amdgcn_rsq
+#else
+ #define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
+#endif
+
+_CLC_OVERLOAD _CLC_DEF double sqrt(double x) {
+
+ uint vcc = x < 0x1p-767;
+ uint exp0 = vcc ? 0x100 : 0;
+ unsigned exp1 = vcc ? 0xffffff80 : 0;
+
+ double v01 = ldexp(x, exp0);
+ double v23 = __clc_builtin_rsq(v01);
+ double v45 = v01 * v23;
+ v23 = v23 * 0.5;
+
+ double v67 = fma(-v23, v45, 0.5);
+ v45 = fma(v45, v67, v45);
+ double v89 = fma(-v45, v45, v01);
+ v23 = fma(v23, v67, v23);
+ v45 = fma(v89, v23, v45);
+ v67 = fma(-v45, v45, v01);
+ v23 = fma(v67, v23, v45);
+
+ v23 = ldexp(v23, exp1);
+ return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double);
+
+#endif
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/OVERRIDES libclc-0.2.0+git20170213/amdgpu/lib/OVERRIDES
--- libclc-0.2.0+git20150813/amdgpu/lib/OVERRIDES 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/OVERRIDES 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,2 @@
+workitem/get_group_id.cl
+workitem/get_global_size.cl
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/SOURCES libclc-0.2.0+git20170213/amdgpu/lib/SOURCES
--- libclc-0.2.0+git20150813/amdgpu/lib/SOURCES 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/SOURCES 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,18 @@
+atomic/atomic.cl
+math/nextafter.cl
+math/sqrt.cl
+synchronization/barrier.cl
+image/get_image_width.cl
+image/get_image_height.cl
+image/get_image_depth.cl
+image/get_image_channel_data_type.cl
+image/get_image_channel_order.cl
+image/get_image_attributes_impl.ll
+image/read_imagef.cl
+image/read_imagei.cl
+image/read_imageui.cl
+image/read_image_impl.ll
+image/write_imagef.cl
+image/write_imagei.cl
+image/write_imageui.cl
+image/write_image_impl.ll
diff -Nru libclc-0.2.0+git20150813/amdgpu/lib/synchronization/barrier.cl libclc-0.2.0+git20170213/amdgpu/lib/synchronization/barrier.cl
--- libclc-0.2.0+git20150813/amdgpu/lib/synchronization/barrier.cl 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/amdgpu/lib/synchronization/barrier.cl 2017-02-12 21:33:49.000000000 +0000
@@ -0,0 +1,10 @@
+
+#include
+
+_CLC_DEF int __clc_clk_local_mem_fence() {
+ return CLK_LOCAL_MEM_FENCE;
+}
+
+_CLC_DEF int __clc_clk_global_mem_fence() {
+ return CLK_GLOBAL_MEM_FENCE;
+}
diff -Nru libclc-0.2.0+git20150813/configure.py libclc-0.2.0+git20170213/configure.py
--- libclc-0.2.0+git20150813/configure.py 2015-08-13 23:43:12.000000000 +0000
+++ libclc-0.2.0+git20170213/configure.py 2017-02-12 21:33:49.000000000 +0000
@@ -69,8 +69,8 @@
llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10
llvm_string_version = 'LLVM' + llvm_version[0] + '.' + llvm_version[1]
-if llvm_int_version < 370:
- print "libclc requires LLVM >= 3.7"
+if llvm_int_version < 400:
+ print "libclc requires LLVM >= 4.0"
sys.exit(1)
llvm_system_libs = llvm_config(['--system-libs'])
@@ -92,18 +92,22 @@
available_targets = {
'r600--' : { 'devices' :
[{'gpu' : 'cedar', 'aliases' : ['palm', 'sumo', 'sumo2', 'redwood', 'juniper']},
- {'gpu' : 'cypress', 'aliases' : ['hemlock']},
- {'gpu' : 'barts', 'aliases' : ['turks', 'caicos']},
- {'gpu' : 'cayman', 'aliases' : ['aruba']}]},
+ {'gpu' : 'cypress', 'aliases' : ['hemlock'] },
+ {'gpu' : 'barts', 'aliases' : ['turks', 'caicos'] },
+ {'gpu' : 'cayman', 'aliases' : ['aruba']} ]},
'amdgcn--': { 'devices' :
- [{'gpu' : 'tahiti', 'aliases' : ['pitcairn', 'verde', 'oland', 'hainan', 'bonaire', 'kabini', 'kaveri', 'hawaii','mullins']}]},
- 'nvptx--' : { 'devices' : [{'gpu' : '', 'aliases' : []}]},
- 'nvptx64--' : { 'devices' : [{'gpu' : '', 'aliases' : []}] },
- 'nvptx--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []}] },
- 'nvptx64--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []}] }
+ [{'gpu' : 'tahiti', 'aliases' : ['pitcairn', 'verde', 'oland', 'hainan', 'bonaire', 'kabini', 'kaveri', 'hawaii','mullins','tonga','carrizo','iceland','fiji','stoney','polaris10','polaris11']} ]},
+ 'amdgcn--amdhsa': { 'devices' :
+ [{'gpu' : '', 'aliases' : ['bonaire', 'hawaii', 'kabini', 'kaveri', 'mullins', 'carrizo', 'stoney', 'fiji', 'iceland', 'tonga','polaris10','polaris11']} ]},
+ 'nvptx--' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]},
+ 'nvptx64--' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]},
+ 'nvptx--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]},
+ 'nvptx64--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]},
}
-default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--']
+available_targets['amdgcn-mesa-mesa3d'] = available_targets['amdgcn--']
+
+default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--', 'amdgcn--amdhsa', 'amdgcn-mesa-mesa3d']
targets = args
if not targets:
@@ -165,9 +169,11 @@
for arch in archs:
subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os))
subdirs.append("%s-%s" % (arch, t_os))
+ if t_os == 'mesa3d':
+ subdirs.append('amdgcn-amdhsa')
subdirs.append(arch)
- if arch == 'amdgcn':
- subdirs.append('r600')
+ if arch == 'amdgcn' or arch == 'r600':
+ subdirs.append('amdgpu')
incdirs = filter(os.path.isdir,
[os.path.join(srcdir, subdir, 'include') for subdir in subdirs])
@@ -180,9 +186,6 @@
# The rule for building a .bc file for the specified architecture using clang.
clang_bc_flags = "-target %s -I`dirname $in` %s " \
"-fno-builtin " \
- "-Dcl_clang_storage_class_specifiers " \
- "-Dcl_khr_fp64 " \
- "-Dcles_khr_int64 " \
"-D__CLC_INTERNAL " \
"-emit-llvm" % (target, clang_cl_includes)
if device['gpu'] != '':
diff -Nru libclc-0.2.0+git20150813/debian/changelog libclc-0.2.0+git20170213/debian/changelog
--- libclc-0.2.0+git20150813/debian/changelog 2015-09-27 19:55:56.000000000 +0000
+++ libclc-0.2.0+git20170213/debian/changelog 2017-06-08 11:11:13.000000000 +0000
@@ -1,3 +1,55 @@
+libclc (0.2.0+git20170213-1~16.04.1) xenial; urgency=medium
+
+ * Backport to xenial. (LP: #1687981)
+ * Don't use debhelper 10.
+
+ -- Timo Aaltonen Fri, 24 Mar 2017 10:11:06 +0200
+
+libclc (0.2.0+git20170213-1) experimental; urgency=medium
+
+ [ Andreas Boll ]
+ * Simplify clang version updates even more.
+
+ [ Timo Aaltonen ]
+ * New upstream snapshot.
+ * clang: Bump clang version to 4.0.
+
+ -- Timo Aaltonen Mon, 13 Feb 2017 15:08:23 +0200
+
+libclc (0.2.0+git20160907-3) unstable; urgency=medium
+
+ * Simplify clang version updates.
+ * Drop de-duplication of files that aren't duplicate any more.
+
+ -- Michael Gilbert Sat, 26 Nov 2016 03:35:48 +0000
+
+libclc (0.2.0+git20160907-2) unstable; urgency=medium
+
+ [ Andreas Boll ]
+ * Declare Multi-Arch: foreign for all packages (closes: #845314).
+
+ [ Michael Gilbert ]
+ * Update to debhelper 10.
+
+ -- Michael Gilbert Sat, 26 Nov 2016 02:35:37 +0000
+
+libclc (0.2.0+git20160907-1) experimental; urgency=medium
+
+ * New upstream snapshot (closes: #836960).
+ * Build with clang 3.9.
+ * Drop devices.patch, upstream.
+ * Use https for Vcs-Git field.
+
+ -- Timo Aaltonen Fri, 16 Sep 2016 09:20:06 +0300
+
+libclc (0.2.0+git20150813-3) unstable; urgency=medium
+
+ * Bump standards version.
+ * Build with clang 3.8 (closes: #832014).
+ * Add support for additional GPU devices (closes: #823677).
+
+ -- Michael Gilbert Sat, 30 Jul 2016 22:47:05 +0000
+
libclc (0.2.0+git20150813-2) unstable; urgency=medium
* Enable build hardening flags.
diff -Nru libclc-0.2.0+git20150813/debian/clang libclc-0.2.0+git20170213/debian/clang
--- libclc-0.2.0+git20150813/debian/clang 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/debian/clang 2017-06-08 10:49:21.000000000 +0000
@@ -0,0 +1 @@
+4.0
diff -Nru libclc-0.2.0+git20150813/debian/clean libclc-0.2.0+git20170213/debian/clean
--- libclc-0.2.0+git20150813/debian/clean 2015-09-27 18:36:38.000000000 +0000
+++ libclc-0.2.0+git20170213/debian/clean 2017-02-13 13:00:28.000000000 +0000
@@ -1,3 +1,4 @@
Makefile
libclc.pc
build/*.pyc
+utils/prepare-builtins.dwo
diff -Nru libclc-0.2.0+git20150813/debian/control libclc-0.2.0+git20170213/debian/control
--- libclc-0.2.0+git20150813/debian/control 2015-09-27 18:38:18.000000000 +0000
+++ libclc-0.2.0+git20170213/debian/control 2017-06-08 11:11:33.000000000 +0000
@@ -7,21 +7,22 @@
Build-Depends:
debhelper (>= 9),
python,
- clang-3.7,
- llvm-3.7-dev (>= 3.7),
+ clang-4.0,
+ llvm-4.0-dev,
zlib1g-dev,
libedit-dev,
-Standards-Version: 3.9.6
+Standards-Version: 3.9.8
Homepage: http://libclc.llvm.org
-Vcs-Git: git://anonscm.debian.org/pkg-opencl/libclc.git
+Vcs-Git: https://anonscm.debian.org/git/pkg-opencl/libclc.git
Vcs-Browser: https://anonscm.debian.org/cgit/pkg-opencl/libclc.git
Package: libclc-ptx
Architecture: all
+Multi-Arch: foreign
Depends:
${misc:Depends},
libclc-dev (= ${binary:Version}),
- libclang-common-3.7-dev,
+ libclang-common-4.0-dev,
Description: OpenCL C language implementation - ptx support
libclc is an open implementation of the OpenCL C programming language,
as specified by the OpenCL 1.1 Specification.
@@ -30,10 +31,11 @@
Package: libclc-amdgcn
Architecture: all
+Multi-Arch: foreign
Depends:
${misc:Depends},
libclc-dev (= ${binary:Version}),
- libclang-common-3.7-dev,
+ libclang-common-4.0-dev,
Description: OpenCL C language implementation - amdgcn support
libclc is an open implementation of the OpenCL C programming language,
as specified by the OpenCL 1.1 Specification.
@@ -43,10 +45,11 @@
Package: libclc-r600
Architecture: all
+Multi-Arch: foreign
Depends:
${misc:Depends},
libclc-dev (= ${binary:Version}),
- libclang-common-3.7-dev,
+ libclang-common-4.0-dev,
Description: OpenCL C language implementation - r600 support
libclc is an open implementation of the OpenCL C programming language,
as specified by the OpenCL 1.1 Specification.
@@ -57,6 +60,7 @@
Package: libclc-dev
Section: libdevel
Architecture: all
+Multi-Arch: foreign
Depends:
${misc:Depends},
Description: OpenCL C language implementation - development files
diff -Nru libclc-0.2.0+git20150813/debian/control.in libclc-0.2.0+git20170213/debian/control.in
--- libclc-0.2.0+git20150813/debian/control.in 1970-01-01 00:00:00.000000000 +0000
+++ libclc-0.2.0+git20170213/debian/control.in 2017-06-08 10:49:21.000000000 +0000
@@ -0,0 +1,70 @@
+Source: libclc
+Section: libs
+Priority: extra
+Maintainer: Debian OpenCL team
+Uploaders:
+ Michael Gilbert ,
+Build-Depends:
+ debhelper (>= 9),
+ python,
+ clang-CLANG_VERSION,
+ llvm-CLANG_VERSION-dev,
+ zlib1g-dev,
+ libedit-dev,
+Standards-Version: 3.9.8
+Homepage: http://libclc.llvm.org
+Vcs-Git: https://anonscm.debian.org/git/pkg-opencl/libclc.git
+Vcs-Browser: https://anonscm.debian.org/cgit/pkg-opencl/libclc.git
+
+Package: libclc-ptx
+Architecture: all
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends},
+ libclc-dev (= ${binary:Version}),
+ libclang-common-CLANG_VERSION-dev,
+Description: OpenCL C language implementation - ptx support
+ libclc is an open implementation of the OpenCL C programming language,
+ as specified by the OpenCL 1.1 Specification.
+ .
+ This package contains support for the PTX platform.
+
+Package: libclc-amdgcn
+Architecture: all
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends},
+ libclc-dev (= ${binary:Version}),
+ libclang-common-CLANG_VERSION-dev,
+Description: OpenCL C language implementation - amdgcn support
+ libclc is an open implementation of the OpenCL C programming language,
+ as specified by the OpenCL 1.1 Specification.
+ .
+ This package contains support for the amdgcn (AMD GPU) platform.
+ Supported GPU families: Southern Islands and newer.
+
+Package: libclc-r600
+Architecture: all
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends},
+ libclc-dev (= ${binary:Version}),
+ libclang-common-CLANG_VERSION-dev,
+Description: OpenCL C language implementation - r600 support
+ libclc is an open implementation of the OpenCL C programming language,
+ as specified by the OpenCL 1.1 Specification.
+ .
+ This package contains support for the r600 (AMD GPU) platform.
+ Supported GPU families: Evergreen and Northern Islands.
+
+Package: libclc-dev
+Section: libdevel
+Architecture: all
+Multi-Arch: foreign
+Depends:
+ ${misc:Depends},
+Description: OpenCL C language implementation - development files
+ libclc is an open implementation of the OpenCL C programming language,
+ as specified by the OpenCL 1.1 Specification.
+ .
+ This package contains development header files.
diff -Nru libclc-0.2.0+git20150813/debian/copyright libclc-0.2.0+git20170213/debian/copyright
--- libclc-0.2.0+git20150813/debian/copyright 2015-09-27 18:36:38.000000000 +0000
+++ libclc-0.2.0+git20170213/debian/copyright 2017-02-13 13:00:28.000000000 +0000
@@ -9,7 +9,7 @@
Files: debian/*
Copyright:
- 2013-2015 Michael Gilbert
+ 2013-2016 Michael Gilbert
2013-2014 Julian Wollrath
License: NCSA or MIT
diff -Nru libclc-0.2.0+git20150813/debian/README.source libclc-0.2.0+git20170213/debian/README.source
--- libclc-0.2.0+git20150813/debian/README.source 2015-09-27 18:36:38.000000000 +0000
+++ libclc-0.2.0+git20170213/debian/README.source 2017-02-13 13:00:28.000000000 +0000
@@ -1,2 +1,12 @@
+Watch File
+==========
There is no watch file in debian/ because upstream uses git but has made
no tags, so there is currently no way to watch upstream changes.
+
+Clang Updates
+=============
+To build with a different version of clang, just alter the clang version
+number contained in the debian/clang file, then update the control file
+with:
+
+$ ./debian/rules debian/control
diff -Nru libclc-0.2.0+git20150813/debian/rules libclc-0.2.0+git20170213/debian/rules
--- libclc-0.2.0+git20150813/debian/rules 2015-09-27 20:01:01.000000000 +0000
+++ libclc-0.2.0+git20170213/debian/rules 2017-06-08 10:49:21.000000000 +0000
@@ -6,26 +6,13 @@
export DEB_BUILD_MAINT_OPTIONS=hardening=+all
-confflags=--prefix=/usr \
- --with-llvm-config=/usr/bin/llvm-config-3.7 \
+LLVM_CONFIG=/usr/bin/llvm-config-$(shell cat debian/clang)
-path=debian/tmp/usr/lib/clc
+debian/control: debian/control.in
+ sed "s/CLANG_VERSION/$(shell cat debian/clang)/g" < $< > $@
-%:
+%: debian/control
dh $@ --parallel
override_dh_auto_configure:
- ./configure.py $(confflags)
-
-override_dh_install:
- test $(shell sha512sum $(path)/cypress-r600--.bc) != $(shell sha512sum $(path)/cayman-r600--.bc) || \
- rm -f $(path)/cypress-r600--.bc && \
- ln -s cayman-r600--.bc $(path)/cypress-r600--.bc
- test $(shell sha512sum $(path)/cedar-r600--.bc) != $(shell sha512sum $(path)/barts-r600--.bc) || \
- rm -f $(path)/cedar-r600--.bc && \
- ln -s barts-r600--.bc $(path)/cedar-r600--.bc
- dh_install
-
-override_dh_clean:
- dh_clean
- find -name '*.d' -execdir rm -f {} \;
+ ./configure.py --prefix=/usr --with-llvm-config=$(LLVM_CONFIG)
diff -Nru libclc-0.2.0+git20150813/generic/include/clc/clc.h libclc-0.2.0+git20170213/generic/include/clc/clc.h
--- libclc-0.2.0+git20150813/generic/include/clc/clc.h 2015-08-13 23:43:12.000000000 +0000
+++ libclc-0.2.0+git20170213/generic/include/clc/clc.h 2017-02-12 21:33:49.000000000 +0000
@@ -30,6 +30,7 @@
#include
#include
#include
+#include
/* 6.11.2 Math Functions */
#include
@@ -43,30 +44,41 @@
#include
#include
#include
+#include
#include
#include
+#include
#include
#include
+#include
#include
#include
+#include
#include
#include
#include
+#include
#include
#include
#include
#include
#include
#include
+#include
#include
#include
#include
+#include
#include
+#include
+#include
#include
#include
#include
#include
+#include
#include
+#include
#include
#include
#include
@@ -77,6 +89,8 @@
#include
#include
#include
+#include
+#include
#include
#include
#include
@@ -88,6 +102,7 @@
#include
#include
#include
+#include
#include
/* 6.11.2.1 Floating-point macros */
@@ -210,6 +225,11 @@
#include
#include
+/* 6.11.13 Image Read and Write Functions */
+
+#include
+#include
+
/* libclc internal defintions */
#ifdef __CLC_INTERNAL
#include
-libclc currently only supports the PTX target, but support for more
-targets is welcome.
+libclc currently supports the AMDGCN, and R600 and NVPTX targets, but
+support for more targets is welcome.
Download
@@ -49,7 +49,7 @@
Mailing List
-libclc-dev@pcc.me.uk (subscribe/unsubscribe, archives)
+libclc-dev@lists.llvm.org (subscribe/unsubscribe, archives)