Skip to content

Instantly share code, notes, and snippets.

@BeMg
Last active May 29, 2020 03:11
Show Gist options
  • Save BeMg/622f4b23e2f4d55ccae5644c4db09265 to your computer and use it in GitHub Desktop.
Save BeMg/622f4b23e2f4d55ccae5644c4db09265 to your computer and use it in GitHub Desktop.
aaa
__kernel void Conv1D(__global int *array, __global int *filter, int N, __global int *output) {
int idx = get_global_id(0);
for (int i=0; i<N; i++) {
output[idx] += array[idx+i] * filter[i];
barrier(CLK_GLOBAL_MEM_FENCE);
printf("Juse for side-effect.\n");
}
}
; ModuleID = 'conv1d.bc'
source_filename = "conv1d.bc"
target datalayout = "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "x86_64-unknown-unknown-unknown"
@kernel_config_l2 = external addrspace(2) constant [32 x i32]
@str = private unnamed_addr constant [22 x i8] c"Juse for side-effect.\00"
; Function Attrs: noduplicate
declare protected void @barrier(i32) #0
; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) #1
; Function Attrs: noinline nounwind
define void @Conv1D(i32 addrspace(1)* nocapture readonly %array, i32 addrspace(1)* nocapture readonly %filter, i32 %N, i32 addrspace(1)* nocapture %output) #2 {
.r_entry2:
%0 = call i32 @__core_num()
%1 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 15), align 4
%2 = mul i32 %0, %1
%3 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 14), align 4
%4 = add i32 %3, %2
%5 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 4), align 4
%6 = mul i32 4, %5
%7 = add i32 %4, %6
%8 = add i32 %7, 7
%9 = and i32 %8, -8
%.i.01.ex_phi.pocl_context = inttoptr i32 %4 to i32*, !ocl.restrict !3
call void @barrier(i32 0) #1
%10 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 10), align 4, !tbaa !4
%11 = icmp sgt i32 %N, 0
br i1 %11, label %pregion_for_init7, label %pregion_for_cond13.preheader
pregion_for_cond13.preheader: ; preds = %.r_entry2
br label %pregion_for_cond13
pregion_for_init7: ; preds = %.r_entry2
store i32 0, i32* %.i.01.ex_phi.pocl_context, align 64
br label %pregion_for_cond9
pregion_for_cond9: ; preds = %pregion_for_cond9, %pregion_for_init7
%12 = phi i32 [ 1, %pregion_for_init7 ], [ %14, %pregion_for_cond9 ]
%13 = getelementptr i32, i32* %.i.01.ex_phi.pocl_context, i32 %12
store i32 0, i32* %13, align 4, !llvm.mem.parallel_loop_access !8
%14 = add i32 %12, 1
%15 = icmp slt i32 %14, %5
br i1 %15, label %pregion_for_cond9, label %.lr.ph.loopbarrier, !llvm.loop !9
.lr.ph.loopbarrier: ; preds = %pregion_for_cond9
call void @barrier(i32 0) #1
br label %pregion_for_cond6
pregion_for_cond6: ; preds = %pregion_for_cond6.backedge, %.lr.ph.loopbarrier
%16 = phi i32 [ 0, %.lr.ph.loopbarrier ], [ %.be, %pregion_for_cond6.backedge ]
%17 = getelementptr i32, i32* %.i.01.ex_phi.pocl_context, i32 %16
%18 = load i32, i32* %17, align 4, !llvm.mem.parallel_loop_access !10
%19 = add i32 %16, %10
%20 = add nsw i32 %18, %19
%21 = getelementptr inbounds i32, i32 addrspace(1)* %array, i32 %20
%22 = load i32, i32 addrspace(1)* %21, align 4, !tbaa !4, !llvm.mem.parallel_loop_access !10
%23 = getelementptr inbounds i32, i32 addrspace(1)* %filter, i32 %18
%24 = load i32, i32 addrspace(1)* %23, align 4, !tbaa !4, !llvm.mem.parallel_loop_access !10
%25 = mul nsw i32 %24, %22
%26 = getelementptr inbounds i32, i32 addrspace(1)* %output, i32 %19
%27 = load i32, i32 addrspace(1)* %26, align 4, !tbaa !4, !llvm.mem.parallel_loop_access !10
%28 = add nsw i32 %27, %25
store i32 %28, i32 addrspace(1)* %26, align 4, !tbaa !4, !llvm.mem.parallel_loop_access !10
%29 = add i32 %16, 1
%30 = icmp slt i32 %29, %5
br i1 %30, label %pregion_for_cond6.backedge, label %pregion_for_init, !llvm.loop !11
pregion_for_init: ; preds = %pregion_for_cond6
tail call void @barrier(i32 2) #3
%31 = add nuw nsw i32 %18, 1
br label %pregion_for_cond
pregion_for_cond: ; preds = %pregion_for_cond, %pregion_for_init
%32 = phi i32 [ 0, %pregion_for_init ], [ %34, %pregion_for_cond ]
%puts = tail call i32 @puts(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @str, i32 0, i32 0)), !llvm.mem.parallel_loop_access !12
%33 = getelementptr i32, i32* %.i.01.ex_phi.pocl_context, i32 %32
store i32 %31, i32* %33, align 4, !llvm.mem.parallel_loop_access !12
%34 = add i32 %32, 1
%35 = icmp slt i32 %34, %5
br i1 %35, label %pregion_for_cond, label %.r_entry.brexitbarrier.latchbarrier, !llvm.loop !13
.r_entry.brexitbarrier.latchbarrier: ; preds = %pregion_for_cond
%36 = icmp slt i32 %31, %N
call void @barrier(i32 0) #1
br i1 %36, label %pregion_for_cond6.backedge, label %exit.barrier3
pregion_for_cond6.backedge: ; preds = %.r_entry.brexitbarrier.latchbarrier, %pregion_for_cond6
%.be = phi i32 [ %29, %pregion_for_cond6 ], [ 0, %.r_entry.brexitbarrier.latchbarrier ]
br label %pregion_for_cond6
pregion_for_cond13: ; preds = %pregion_for_cond13, %pregion_for_cond13.preheader
%37 = phi i32 [ %38, %pregion_for_cond13 ], [ 1, %pregion_for_cond13.preheader ]
%38 = add i32 %37, 1
%39 = icmp slt i32 %38, %5
br i1 %39, label %pregion_for_cond13, label %exit.barrier, !llvm.loop !14
exit.barrier: ; preds = %pregion_for_cond13
call void @barrier(i32 0) #1
br label %UnifiedReturnBlock
exit.barrier3: ; preds = %.r_entry.brexitbarrier.latchbarrier
call void @barrier(i32 0) #1
br label %UnifiedReturnBlock
UnifiedReturnBlock: ; preds = %exit.barrier3, %exit.barrier
ret void
}
declare i32 @__core_num()
attributes #0 = { noduplicate "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
attributes #2 = { noinline nounwind "_kernel_local_size"="0" "_wi_alloca_size"="4" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { noduplicate nounwind }
!llvm.ident = !{!0}
!ocl.restrict = !{!1}
!opencl.kernels = !{!2}
!0 = !{!"clang version 3.6.0 (git://git.ti.com/opencl/clang.git 5b006f07bdc22b5ae6917eecdfe243908dd7b029) (git://git.ti.com/opencl/llvm.git 09780c6750b30da81e4a0a805aedf1699fbc37c7)"}
!1 = distinct !{!1}
!2 = !{void (i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*)* @Conv1D}
!3 = distinct !{!3}
!4 = !{!5, !5, i64 0}
!5 = !{!"int", !6, i64 0}
!6 = !{!"omnipotent char", !7, i64 0}
!7 = !{!"Simple C/C++ TBAA"}
!8 = !{!9}
!9 = distinct !{!9}
!10 = !{!11}
!11 = distinct !{!11}
!12 = !{!13}
!13 = distinct !{!13}
!14 = distinct !{!14}
; ModuleID = 'conv1d.bc'
source_filename = "conv1d.bc"
target datalayout = "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "c6000-unknown-unknown-unknown"
@kernel_config_l2 = external addrspace(2) constant [32 x i32]
; Function Attrs: nounwind
define void @Conv1D(i32 addrspace(1)* nocapture readonly %array, i32 addrspace(1)* nocapture readonly %filter, i32 %N, i32 addrspace(1)* nocapture %output) #0 {
.entry:
%0 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 10), align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11
%1 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 4), align 4, !llvm.mem.parallel_loop_access !11
%2 = icmp sgt i32 %1, 0
br i1 %2, label %.bodyTop.preheader, label %.exit
.bodyTop.preheader: ; preds = %.entry
br label %.bodyTop
.bodyTop: ; preds = %.bodyEnd, %.bodyTop.preheader
%3 = phi i32 [ %18, %.bodyEnd ], [ 0, %.bodyTop.preheader ]
%4 = add i32 %3, %0
%5 = icmp sgt i32 %N, 0
br i1 %5, label %.lr.ph, label %.bodyEnd
.lr.ph: ; preds = %.bodyTop
%6 = getelementptr inbounds i32, i32 addrspace(1)* %output, i32 %4
%.pre = load i32, i32 addrspace(1)* %6, align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11
br label %7
7: ; preds = %7, %.lr.ph
%8 = phi i32 [ %.pre, %.lr.ph ], [ %15, %7 ]
%i.01 = phi i32 [ 0, %.lr.ph ], [ %16, %7 ]
%9 = add nsw i32 %i.01, %4
%10 = getelementptr inbounds i32, i32 addrspace(1)* %array, i32 %9
%11 = load i32, i32 addrspace(1)* %10, align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11
%12 = getelementptr inbounds i32, i32 addrspace(1)* %filter, i32 %i.01
%13 = load i32, i32 addrspace(1)* %12, align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11
%14 = mul nsw i32 %13, %11
%15 = add nsw i32 %8, %14
store i32 %15, i32 addrspace(1)* %6, align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11
%16 = add nuw nsw i32 %i.01, 1
%17 = icmp slt i32 %16, %N
br i1 %17, label %7, label %.bodyEnd.loopexit
.bodyEnd.loopexit: ; preds = %7
br label %.bodyEnd
.bodyEnd: ; preds = %.bodyEnd.loopexit, %.bodyTop
%18 = add i32 %3, 1
%19 = icmp slt i32 %18, %1
br i1 %19, label %.bodyTop, label %.exit.loopexit, !llvm.loop !12
.exit.loopexit: ; preds = %.bodyEnd
br label %.exit
.exit: ; preds = %.exit.loopexit, %.entry
ret void
}
attributes #0 = { nounwind "_kernel_local_size"="0" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
!opencl.kernels = !{!0}
!llvm.ident = !{!6}
!0 = !{void (i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*)* @Conv1D, !1, !2, !3, !4, !5}
!1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 0, i32 1}
!2 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"}
!3 = !{!"kernel_arg_type", !"int*", !"int*", !"int", !"int*"}
!4 = !{!"kernel_arg_base_type", !"int*", !"int*", !"int", !"int*"}
!5 = !{!"kernel_arg_type_qual", !"", !"", !"", !""}
!6 = !{!"clang version 3.6.0 (git://git.ti.com/opencl/clang.git 5b006f07bdc22b5ae6917eecdfe243908dd7b029) (git://git.ti.com/opencl/llvm.git 09780c6750b30da81e4a0a805aedf1699fbc37c7)"}
!7 = !{!8, !8, i64 0}
!8 = !{!"int", !9, i64 0}
!9 = !{!"omnipotent char", !10, i64 0}
!10 = !{!"Simple C/C++ TBAA"}
!11 = !{!12}
!12 = distinct !{!12}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment