Skip to content

Instantly share code, notes, and snippets.

@BeMg
Created April 9, 2020 05:09
Show Gist options
  • Save BeMg/744539492eb543dde35e49dd7c46f6ef to your computer and use it in GitHub Desktop.
Save BeMg/744539492eb543dde35e49dd7c46f6ef to your computer and use it in GitHub Desktop.
tvm stride load store example
// attr [PaddedInput] storage_scope = "global"
allocate PaddedInput[float32 * 6553600]
// attr [DepthwiseConv2d] storage_scope = "global"
allocate DepthwiseConv2d[float32 * 2359296]
produce PaddedInput {
for (i1, 0, 256) {
for (i2, 0, 160) {
for (i3, 0, 160) {
PaddedInput[((((i1*160) + i2)*160) + i3)] = tvm_if_then_else(((((32 <= i2) && (i2 < 128)) && (32 <= i3)) && (i3 < 128)), Input[(((((i1*96) + i2)*96) + i3) + -3104)], 0.000000f)
}
}
}
}
produce DepthwiseConv2d {
for (c, 0, 256) {
for (i, 0, 96) {
for (j, 0, 96) {
DepthwiseConv2d[((((c*96) + i)*96) + j)] = 0.000000f
for (di, 0, 3) {
for (dj, 0, 3) {
DepthwiseConv2d[((((c*96) + i)*96) + j)] = (DepthwiseConv2d[((((c*96) + i)*96) + j)] + (PaddedInput[((((((((c*5) + di)*32) + i)*5) + dj)*32) + j)]*Filter[((((c*3) + di)*3) + dj)]))
}
}
}
}
}
}
// attr [PaddedInput] storage_scope = "global"
allocate PaddedInput[float32 * 6553600]
// attr [DepthwiseConv2d] storage_scope = "global"
allocate DepthwiseConv2d[float32x8 * 294912]
produce PaddedInput {
for (i1, 0, 256) {
for (i2, 0, 160) {
for (i3, 0, 160) {
PaddedInput[((((i1*160) + i2)*160) + i3)] = tvm_if_then_else(((((32 <= i2) && (i2 < 128)) && (32 <= i3)) && (i3 < 128)), Input[(((((i1*96) + i2)*96) + i3) + -3104)], 0.000000f)
}
}
}
}
produce DepthwiseConv2d {
for (b.i.j.fused.fused, 0, 9216) {
for (c.outer, 0, 32) {
DepthwiseConv2d[ramp(((c.outer*73728) + b.i.j.fused.fused), 9216, 8)] = x8(0.000000f)
for (di, 0, 3) {
for (dj, 0, 3) {
DepthwiseConv2d[ramp(((c.outer*73728) + b.i.j.fused.fused), 9216, 8)] = (DepthwiseConv2d[ramp(((c.outer*73728) + b.i.j.fused.fused), 9216, 8)] + (PaddedInput[ramp(((((((((c.outer*40) + di)*32) + (b.i.j.fused.fused/96))*5) + dj)*32) + (b.i.j.fused.fused % 96)), 25600, 8)]*Filter[ramp(((((c.outer*24) + di)*3) + dj), 9, 8)]))
}
}
}
}
}
; ModuleID = 'default_function'
source_filename = "default_function"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64-unknown-elf"
%0 = type { i8*, %1, i32, %2, i64*, i64*, i64 }
%1 = type { i32, i32 }
%2 = type { i8, i8, i16 }
@__TVMAPISetLastError = linkonce dllexport local_unnamed_addr global void (i8*)* null, align 8
@.str = private constant [69 x i8] c"Assert fail: (num_args == 2), default_function: num_args should be 2\00", align 1
@.str.1 = private constant [202 x i8] c"Assert fail: ((((1 == int32(arg0.strides[3])) && (96 == int32(arg0.strides[2]))) && (9216 == int32(arg0.strides[1]))) && (2359296 == int32(arg0.strides[0]))), arg0.strides: expected to be compact array\00", align 1
@.str.2 = private constant [192 x i8] c"Assert fail: ((((1 == int32(arg1.strides[3])) && (3 == int32(arg1.strides[2]))) && (9 == int32(arg1.strides[1]))) && (9 == int32(arg1.strides[0]))), arg1.strides: expected to be compact array\00", align 1
@.str.3 = private constant [144 x i8] c"Assert fail: ((((arg0.code == 3) || (arg0.code == 13)) || (arg0.code == 7)) || (arg0.code == 4)), default_function: Expect arg[0] to be pointer\00", align 1
@.str.4 = private constant [144 x i8] c"Assert fail: ((((arg1.code == 3) || (arg1.code == 13)) || (arg1.code == 7)) || (arg1.code == 4)), default_function: Expect arg[1] to be pointer\00", align 1
@.str.5 = private constant [55 x i8] c"Assert fail: (dev_type == 1), device_type need to be 1\00", align 1
@.str.6 = private constant [81 x i8] c"Assert fail: (4 == tvm_struct_get(arg0, 0, 4)), arg0.ndim is expected to equal 4\00", align 1
@.str.7 = private constant [186 x i8] c"Assert fail: (((tvm_struct_get(arg0, 0, 5) == (uint8)2) && (tvm_struct_get(arg0, 0, 6) == (uint8)32)) && (tvm_struct_get(arg0, 0, 7) == (uint16)1)), arg0.dtype is expected to be float32\00", align 1
@.str.8 = private constant [95 x i8] c"Assert fail: (int32(arg0.shape[0]) == 1), Argument arg0.shape[0] has an unsatisfied constraint\00", align 1
@.str.9 = private constant [97 x i8] c"Assert fail: (int32(arg0.shape[1]) == 256), Argument arg0.shape[1] has an unsatisfied constraint\00", align 1
@.str.10 = private constant [96 x i8] c"Assert fail: (int32(arg0.shape[2]) == 96), Argument arg0.shape[2] has an unsatisfied constraint\00", align 1
@.str.11 = private constant [96 x i8] c"Assert fail: (int32(arg0.shape[3]) == 96), Argument arg0.shape[3] has an unsatisfied constraint\00", align 1
@.str.12 = private constant [112 x i8] c"Assert fail: (tvm_struct_get(arg0, 0, 8) == (uint64)0), Argument arg0.byte_offset has an unsatisfied constraint\00", align 1
@.str.13 = private constant [81 x i8] c"Assert fail: (4 == tvm_struct_get(arg1, 0, 4)), arg1.ndim is expected to equal 4\00", align 1
@.str.14 = private constant [186 x i8] c"Assert fail: (((tvm_struct_get(arg1, 0, 5) == (uint8)2) && (tvm_struct_get(arg1, 0, 6) == (uint8)32)) && (tvm_struct_get(arg1, 0, 7) == (uint16)1)), arg1.dtype is expected to be float32\00", align 1
@.str.15 = private constant [97 x i8] c"Assert fail: (int32(arg1.shape[0]) == 256), Argument arg1.shape[0] has an unsatisfied constraint\00", align 1
@.str.16 = private constant [95 x i8] c"Assert fail: (int32(arg1.shape[1]) == 1), Argument arg1.shape[1] has an unsatisfied constraint\00", align 1
@.str.17 = private constant [95 x i8] c"Assert fail: (int32(arg1.shape[2]) == 3), Argument arg1.shape[2] has an unsatisfied constraint\00", align 1
@.str.18 = private constant [95 x i8] c"Assert fail: (int32(arg1.shape[3]) == 3), Argument arg1.shape[3] has an unsatisfied constraint\00", align 1
@.str.19 = private constant [112 x i8] c"Assert fail: (tvm_struct_get(arg1, 0, 8) == (uint64)0), Argument arg1.byte_offset has an unsatisfied constraint\00", align 1
@.str.20 = private constant [105 x i8] c"Assert fail: (1 == tvm_struct_get(arg1, 0, 10)), Argument arg1.device_type has an unsatisfied constraint\00", align 1
@.str.21 = private constant [107 x i8] c"Assert fail: (dev_id == tvm_struct_get(arg1, 0, 9)), Argument arg1.device_id has an unsatisfied constraint\00", align 1
@__TVMBackendAllocWorkspace = linkonce dllexport local_unnamed_addr global i8* (i32, i32, i64, i32, i32)* null, align 8
@__TVMBackendFreeWorkspace = linkonce dllexport local_unnamed_addr global i32 (i32, i32, i8*)* null, align 8
@__tvm_main__ = weak local_unnamed_addr constant [17 x i8] c"default_function\00", align 1
define dllexport i32 @default_function(i8* noalias nocapture readonly, i8* noalias nocapture readonly, i32) local_unnamed_addr {
entry:
%3 = icmp eq i32 %2, 2
br i1 %3, label %assert_end, label %assert_fail, !prof !1
assert_fail: ; preds = %entry
%4 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %4(i8* getelementptr inbounds ([69 x i8], [69 x i8]* @.str, i64 0, i64 0))
ret i32 -1
assert_end: ; preds = %entry
%5 = bitcast i8* %0 to %0**
%6 = load %0*, %0** %5, align 8
%7 = bitcast i8* %1 to i32*
%8 = load i32, i32* %7, align 4, !tbaa !5
%9 = getelementptr inbounds i8, i8* %0, i64 8
%10 = bitcast i8* %9 to %0**
%11 = load %0*, %0** %10, align 8
%12 = getelementptr inbounds i8, i8* %1, i64 4
%13 = bitcast i8* %12 to i32*
%14 = load i32, i32* %13, align 4, !tbaa !19
%15 = getelementptr inbounds %0, %0* %6, i64 0, i32 0
%16 = load i8*, i8** %15, align 8
%17 = getelementptr inbounds %0, %0* %6, i64 0, i32 4
%18 = load i64*, i64** %17, align 8
%19 = getelementptr inbounds %0, %0* %6, i64 0, i32 5
%20 = load i64*, i64** %19, align 8
%21 = icmp eq i64* %20, null
br i1 %21, label %if_end, label %if_then, !prof !21
if_then: ; preds = %assert_end
%22 = load i64, i64* %20, align 8, !tbaa !22
%23 = trunc i64 %22 to i32
%24 = icmp eq i32 %23, 2359296
%25 = getelementptr inbounds i64, i64* %20, i64 1
%26 = load i64, i64* %25, align 8, !tbaa !36
%27 = trunc i64 %26 to i32
%28 = icmp eq i32 %27, 9216
%29 = getelementptr inbounds i64, i64* %20, i64 2
%30 = load i64, i64* %29, align 8, !tbaa !38
%31 = trunc i64 %30 to i32
%32 = icmp eq i32 %31, 96
%33 = getelementptr inbounds i64, i64* %20, i64 3
%34 = load i64, i64* %33, align 8, !tbaa !41
%35 = trunc i64 %34 to i32
%36 = icmp eq i32 %35, 1
%37 = and i1 %32, %36
%38 = and i1 %28, %37
%39 = and i1 %24, %38
br i1 %39, label %if_end, label %assert_fail1, !prof !1
if_end: ; preds = %assert_end, %if_then
%40 = getelementptr inbounds %0, %0* %6, i64 0, i32 1, i32 0
%41 = load i32, i32* %40, align 4
%42 = getelementptr inbounds %0, %0* %6, i64 0, i32 1, i32 1
%43 = load i32, i32* %42, align 4
%44 = getelementptr inbounds %0, %0* %11, i64 0, i32 0
%45 = load i8*, i8** %44, align 8
%46 = getelementptr inbounds %0, %0* %11, i64 0, i32 4
%47 = load i64*, i64** %46, align 8
%48 = getelementptr inbounds %0, %0* %11, i64 0, i32 5
%49 = load i64*, i64** %48, align 8
%50 = icmp eq i64* %49, null
br i1 %50, label %if_end4, label %if_then3, !prof !21
assert_fail1: ; preds = %if_then
%51 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %51(i8* getelementptr inbounds ([202 x i8], [202 x i8]* @.str.1, i64 0, i64 0))
ret i32 -1
if_then3: ; preds = %if_end
%52 = load i64, i64* %49, align 8, !tbaa !43
%53 = trunc i64 %52 to i32
%54 = icmp eq i32 %53, 9
%55 = getelementptr inbounds i64, i64* %49, i64 1
%56 = load i64, i64* %55, align 8, !tbaa !57
%57 = trunc i64 %56 to i32
%58 = icmp eq i32 %57, 9
%59 = getelementptr inbounds i64, i64* %49, i64 2
%60 = load i64, i64* %59, align 8, !tbaa !59
%61 = trunc i64 %60 to i32
%62 = icmp eq i32 %61, 3
%63 = getelementptr inbounds i64, i64* %49, i64 3
%64 = load i64, i64* %63, align 8, !tbaa !62
%65 = trunc i64 %64 to i32
%66 = icmp eq i32 %65, 1
%67 = and i1 %62, %66
%68 = and i1 %58, %67
%69 = and i1 %54, %68
br i1 %69, label %if_end4, label %assert_fail5, !prof !1
if_end4: ; preds = %if_end, %if_then3
switch i32 %8, label %assert_fail7 [
i32 13, label %assert_end8
i32 7, label %assert_end8
i32 4, label %assert_end8
i32 3, label %assert_end8
]
assert_fail5: ; preds = %if_then3
%70 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %70(i8* getelementptr inbounds ([192 x i8], [192 x i8]* @.str.2, i64 0, i64 0))
ret i32 -1
assert_fail7: ; preds = %if_end4
%71 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %71(i8* getelementptr inbounds ([144 x i8], [144 x i8]* @.str.3, i64 0, i64 0))
ret i32 -1
assert_end8: ; preds = %if_end4, %if_end4, %if_end4, %if_end4
switch i32 %14, label %assert_fail9 [
i32 13, label %assert_end10
i32 7, label %assert_end10
i32 4, label %assert_end10
i32 3, label %assert_end10
]
assert_fail9: ; preds = %assert_end8
%72 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %72(i8* getelementptr inbounds ([144 x i8], [144 x i8]* @.str.4, i64 0, i64 0))
ret i32 -1
assert_end10: ; preds = %assert_end8, %assert_end8, %assert_end8, %assert_end8
%73 = icmp eq i32 %41, 1
br i1 %73, label %assert_end12, label %assert_fail11, !prof !1
assert_fail11: ; preds = %assert_end10
%74 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %74(i8* getelementptr inbounds ([55 x i8], [55 x i8]* @.str.5, i64 0, i64 0))
ret i32 -1
assert_end12: ; preds = %assert_end10
%75 = getelementptr inbounds %0, %0* %6, i64 0, i32 2
%76 = load i32, i32* %75, align 4
%77 = icmp eq i32 %76, 4
br i1 %77, label %assert_end14, label %assert_fail13, !prof !1
assert_fail13: ; preds = %assert_end12
%78 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %78(i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.6, i64 0, i64 0))
ret i32 -1
assert_end14: ; preds = %assert_end12
%79 = getelementptr inbounds %0, %0* %6, i64 0, i32 3, i32 2
%80 = load i16, i16* %79, align 2
%81 = icmp eq i16 %80, 1
%82 = getelementptr inbounds %0, %0* %6, i64 0, i32 3, i32 1
%83 = load i8, i8* %82, align 1
%84 = icmp eq i8 %83, 32
%85 = getelementptr inbounds %0, %0* %6, i64 0, i32 3, i32 0
%86 = load i8, i8* %85, align 1
%87 = icmp eq i8 %86, 2
%88 = and i1 %84, %87
%89 = and i1 %81, %88
br i1 %89, label %assert_end16, label %assert_fail15, !prof !1
assert_fail15: ; preds = %assert_end14
%90 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %90(i8* getelementptr inbounds ([186 x i8], [186 x i8]* @.str.7, i64 0, i64 0))
ret i32 -1
assert_end16: ; preds = %assert_end14
%91 = load i64, i64* %18, align 8, !tbaa !64
%92 = trunc i64 %91 to i32
%93 = icmp eq i32 %92, 1
br i1 %93, label %assert_end18, label %assert_fail17, !prof !1
assert_fail17: ; preds = %assert_end16
%94 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %94(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.8, i64 0, i64 0))
ret i32 -1
assert_end18: ; preds = %assert_end16
%95 = getelementptr inbounds i64, i64* %18, i64 1
%96 = load i64, i64* %95, align 8, !tbaa !78
%97 = trunc i64 %96 to i32
%98 = icmp eq i32 %97, 256
br i1 %98, label %assert_end20, label %assert_fail19, !prof !1
assert_fail19: ; preds = %assert_end18
%99 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %99(i8* getelementptr inbounds ([97 x i8], [97 x i8]* @.str.9, i64 0, i64 0))
ret i32 -1
assert_end20: ; preds = %assert_end18
%100 = getelementptr inbounds i64, i64* %18, i64 2
%101 = load i64, i64* %100, align 8, !tbaa !80
%102 = trunc i64 %101 to i32
%103 = icmp eq i32 %102, 96
br i1 %103, label %assert_end22, label %assert_fail21, !prof !1
assert_fail21: ; preds = %assert_end20
%104 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %104(i8* getelementptr inbounds ([96 x i8], [96 x i8]* @.str.10, i64 0, i64 0))
ret i32 -1
assert_end22: ; preds = %assert_end20
%105 = getelementptr inbounds i64, i64* %18, i64 3
%106 = load i64, i64* %105, align 8, !tbaa !83
%107 = trunc i64 %106 to i32
%108 = icmp eq i32 %107, 96
br i1 %108, label %assert_end24, label %assert_fail23, !prof !1
assert_fail23: ; preds = %assert_end22
%109 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %109(i8* getelementptr inbounds ([96 x i8], [96 x i8]* @.str.11, i64 0, i64 0))
ret i32 -1
assert_end24: ; preds = %assert_end22
%110 = getelementptr inbounds %0, %0* %6, i64 0, i32 6
%111 = load i64, i64* %110, align 8
%112 = icmp eq i64 %111, 0
br i1 %112, label %assert_end26, label %assert_fail25, !prof !1
assert_fail25: ; preds = %assert_end24
%113 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %113(i8* getelementptr inbounds ([112 x i8], [112 x i8]* @.str.12, i64 0, i64 0))
ret i32 -1
assert_end26: ; preds = %assert_end24
%114 = getelementptr inbounds %0, %0* %11, i64 0, i32 2
%115 = load i32, i32* %114, align 4
%116 = icmp eq i32 %115, 4
br i1 %116, label %assert_end28, label %assert_fail27, !prof !1
assert_fail27: ; preds = %assert_end26
%117 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %117(i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.13, i64 0, i64 0))
ret i32 -1
assert_end28: ; preds = %assert_end26
%118 = getelementptr inbounds %0, %0* %11, i64 0, i32 3, i32 2
%119 = load i16, i16* %118, align 2
%120 = icmp eq i16 %119, 1
%121 = getelementptr inbounds %0, %0* %11, i64 0, i32 3, i32 1
%122 = load i8, i8* %121, align 1
%123 = icmp eq i8 %122, 32
%124 = getelementptr inbounds %0, %0* %11, i64 0, i32 3, i32 0
%125 = load i8, i8* %124, align 1
%126 = icmp eq i8 %125, 2
%127 = and i1 %123, %126
%128 = and i1 %120, %127
br i1 %128, label %assert_end30, label %assert_fail29, !prof !1
assert_fail29: ; preds = %assert_end28
%129 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %129(i8* getelementptr inbounds ([186 x i8], [186 x i8]* @.str.14, i64 0, i64 0))
ret i32 -1
assert_end30: ; preds = %assert_end28
%130 = load i64, i64* %47, align 8, !tbaa !85
%131 = trunc i64 %130 to i32
%132 = icmp eq i32 %131, 256
br i1 %132, label %assert_end32, label %assert_fail31, !prof !1
assert_fail31: ; preds = %assert_end30
%133 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %133(i8* getelementptr inbounds ([97 x i8], [97 x i8]* @.str.15, i64 0, i64 0))
ret i32 -1
assert_end32: ; preds = %assert_end30
%134 = getelementptr inbounds i64, i64* %47, i64 1
%135 = load i64, i64* %134, align 8, !tbaa !99
%136 = trunc i64 %135 to i32
%137 = icmp eq i32 %136, 1
br i1 %137, label %assert_end34, label %assert_fail33, !prof !1
assert_fail33: ; preds = %assert_end32
%138 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %138(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.16, i64 0, i64 0))
ret i32 -1
assert_end34: ; preds = %assert_end32
%139 = getelementptr inbounds i64, i64* %47, i64 2
%140 = load i64, i64* %139, align 8, !tbaa !101
%141 = trunc i64 %140 to i32
%142 = icmp eq i32 %141, 3
br i1 %142, label %assert_end36, label %assert_fail35, !prof !1
assert_fail35: ; preds = %assert_end34
%143 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %143(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.17, i64 0, i64 0))
ret i32 -1
assert_end36: ; preds = %assert_end34
%144 = getelementptr inbounds i64, i64* %47, i64 3
%145 = load i64, i64* %144, align 8, !tbaa !104
%146 = trunc i64 %145 to i32
%147 = icmp eq i32 %146, 3
br i1 %147, label %assert_end38, label %assert_fail37, !prof !1
assert_fail37: ; preds = %assert_end36
%148 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %148(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.18, i64 0, i64 0))
ret i32 -1
assert_end38: ; preds = %assert_end36
%149 = getelementptr inbounds %0, %0* %11, i64 0, i32 6
%150 = load i64, i64* %149, align 8
%151 = icmp eq i64 %150, 0
br i1 %151, label %assert_end40, label %assert_fail39, !prof !1
assert_fail39: ; preds = %assert_end38
%152 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %152(i8* getelementptr inbounds ([112 x i8], [112 x i8]* @.str.19, i64 0, i64 0))
ret i32 -1
assert_end40: ; preds = %assert_end38
%153 = getelementptr inbounds %0, %0* %11, i64 0, i32 1, i32 0
%154 = load i32, i32* %153, align 4
%155 = icmp eq i32 %154, 1
br i1 %155, label %assert_end42, label %assert_fail41, !prof !1
assert_fail41: ; preds = %assert_end40
%156 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %156(i8* getelementptr inbounds ([105 x i8], [105 x i8]* @.str.20, i64 0, i64 0))
ret i32 -1
assert_end42: ; preds = %assert_end40
%157 = getelementptr inbounds %0, %0* %11, i64 0, i32 1, i32 1
%158 = load i32, i32* %157, align 4
%159 = icmp eq i32 %43, %158
br i1 %159, label %assert_end44, label %assert_fail43, !prof !1
assert_fail43: ; preds = %assert_end42
%160 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
tail call void %160(i8* getelementptr inbounds ([107 x i8], [107 x i8]* @.str.21, i64 0, i64 0))
ret i32 -1
assert_end44: ; preds = %assert_end42
tail call fastcc void @default_function_compute_(i8* %16, i8* %45, i32 %43)
ret i32 0
}
; Function Attrs: noinline
define private fastcc void @default_function_compute_(i8* noalias nocapture readonly, i8* noalias, i32) unnamed_addr #0 {
entry:
%3 = load i8* (i32, i32, i64, i32, i32)*, i8* (i32, i32, i64, i32, i32)** @__TVMBackendAllocWorkspace, align 8, !tbaa !2
%4 = tail call i8* %3(i32 1, i32 %2, i64 26214400, i32 2, i32 32)
%5 = load i8* (i32, i32, i64, i32, i32)*, i8* (i32, i32, i64, i32, i32)** @__TVMBackendAllocWorkspace, align 8, !tbaa !2
%6 = tail call i8* %5(i32 1, i32 %2, i64 9437184, i32 2, i32 32)
%7 = bitcast i8* %0 to float*
%8 = bitcast i8* %4 to float*
br label %for_begin3.preheader
for_begin3.preheader: ; preds = %for_end5, %entry
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for_end5 ]
%9 = mul nuw nsw i64 %indvar, 102400
%10 = mul nuw nsw i64 %indvar, 160
%11 = mul nuw nsw i64 %indvar, 96
br label %for_begin6.preheader
for_begin11.preheader: ; preds = %for_end5
%12 = bitcast i8* %6 to float*
%13 = bitcast i8* %1 to float*
br label %for_begin14.preheader
for_begin6.preheader: ; preds = %for_end8, %for_begin3.preheader
%indvar9 = phi i64 [ 0, %for_begin3.preheader ], [ %indvar.next10, %for_end8 ]
%14 = add nuw nsw i64 %indvar9, %10
%15 = mul nuw nsw i64 %14, 160
%16 = trunc i64 %indvar9 to i32
%17 = add i32 %16, -32
%18 = icmp ult i32 %17, 96
%19 = add nuw nsw i64 %indvar9, %11
%20 = mul nuw nsw i64 %19, 96
%21 = add nsw i64 %20, -3104
br i1 %18, label %for_body7.us, label %for_body7.preheader
for_body7.preheader: ; preds = %for_begin6.preheader
%22 = mul nuw nsw i64 %indvar9, 640
%23 = add nuw nsw i64 %9, %22
%scevgep = getelementptr i8, i8* %4, i64 %23
call void @llvm.memset.p0i8.i64(i8* align 4 %scevgep, i8 0, i64 640, i1 false)
br label %for_end8
for_body7.us: ; preds = %for_begin6.preheader, %if_end10.us
%indvars.iv11 = phi i64 [ %indvars.iv.next12, %if_end10.us ], [ 0, %for_begin6.preheader ]
%24 = add nuw nsw i64 %indvars.iv11, %15
%25 = trunc i64 %indvars.iv11 to i32
%26 = add i32 %25, -32
%27 = icmp ult i32 %26, 96
br i1 %27, label %if_then9.us, label %if_end10.us
if_then9.us: ; preds = %for_body7.us
%28 = add nsw i64 %21, %indvars.iv11
%29 = getelementptr inbounds float, float* %7, i64 %28
%30 = load float, float* %29, align 4, !tbaa !106
br label %if_end10.us
if_end10.us: ; preds = %if_then9.us, %for_body7.us
%31 = phi float [ %30, %if_then9.us ], [ 0.000000e+00, %for_body7.us ]
%32 = getelementptr inbounds float, float* %8, i64 %24
store float %31, float* %32, align 4, !tbaa !109
%indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
%exitcond14 = icmp eq i64 %indvars.iv.next12, 160
br i1 %exitcond14, label %for_end8, label %for_body7.us, !prof !21
for_end5: ; preds = %for_end8
%indvar.next = add nuw nsw i64 %indvar, 1
%exitcond17 = icmp eq i64 %indvar.next, 256
br i1 %exitcond17, label %for_begin11.preheader, label %for_begin3.preheader, !prof !21
for_end8: ; preds = %if_end10.us, %for_body7.preheader
%indvar.next10 = add nuw nsw i64 %indvar9, 1
%exitcond16 = icmp eq i64 %indvar.next10, 160
br i1 %exitcond16, label %for_end5, label %for_begin6.preheader, !prof !21
for_begin14.preheader: ; preds = %for_end16, %for_begin11.preheader
%indvars.iv3 = phi i64 [ 0, %for_begin11.preheader ], [ %indvars.iv.next4, %for_end16 ]
%33 = trunc i64 %indvars.iv3 to i32
%34 = udiv i32 %33, 96
%35 = mul i32 %34, 96
%.decomposed = sub i32 %33, %35
br label %for_body15
for_end13: ; preds = %for_end16
%36 = load i32 (i32, i32, i8*)*, i32 (i32, i32, i8*)** @__TVMBackendFreeWorkspace, align 8, !tbaa !2
%37 = tail call i32 %36(i32 1, i32 %2, i8* %6)
%38 = load i32 (i32, i32, i8*)*, i32 (i32, i32, i8*)** @__TVMBackendFreeWorkspace, align 8, !tbaa !2
%39 = tail call i32 %38(i32 1, i32 %2, i8* %4)
ret void
for_body15: ; preds = %for_body15, %for_begin14.preheader
%indvars.iv = phi i64 [ 0, %for_begin14.preheader ], [ %indvars.iv.next, %for_body15 ]
%40 = phi i32 [ 0, %for_begin14.preheader ], [ %207, %for_body15 ]
%41 = mul nuw nsw i64 %indvars.iv, 73728
%42 = add nuw nsw i64 %41, %indvars.iv3
%43 = getelementptr inbounds float, float* %12, i64 %42
%44 = bitcast float* %43 to <8 x float>*
%45 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> zeroinitializer, <8 x float>* %44, i64 36864)
%46 = mul nuw nsw i64 %indvars.iv, 24
%47 = mul i32 %40, 1280
%48 = add nuw nsw i32 %47, %34
%49 = mul nuw i64 %indvars.iv, 72
%50 = mul i32 %48, 160
%51 = add nsw i32 %50, %.decomposed
%52 = sext i32 %51 to i64
%53 = getelementptr inbounds float, float* %8, i64 %52
%54 = bitcast float* %53 to <8 x float>*
%55 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%56 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %54, i64 102400)
%57 = getelementptr inbounds float, float* %13, i64 %49
%58 = bitcast float* %57 to <8 x float>*
%59 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%60 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %58, i64 36)
%61 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%62 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%63 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %56, <8 x float> %60, <8 x float> %62)
%64 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %63, <8 x float>* %44, i64 36864)
%65 = mul i32 %48, 160
%66 = add i32 %65, 32
%67 = add nsw i32 %66, %.decomposed
%68 = sext i32 %67 to i64
%69 = getelementptr inbounds float, float* %8, i64 %68
%70 = bitcast float* %69 to <8 x float>*
%71 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%72 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %70, i64 102400)
%73 = or i64 %49, 1
%74 = getelementptr inbounds float, float* %13, i64 %73
%75 = bitcast float* %74 to <8 x float>*
%76 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%77 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %75, i64 36)
%78 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%79 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%80 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %72, <8 x float> %77, <8 x float> %79)
%81 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %80, <8 x float>* %44, i64 36864)
%82 = mul i32 %48, 160
%83 = add i32 %82, 64
%84 = add nsw i32 %83, %.decomposed
%85 = sext i32 %84 to i64
%86 = getelementptr inbounds float, float* %8, i64 %85
%87 = bitcast float* %86 to <8 x float>*
%88 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%89 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %87, i64 102400)
%90 = or i64 %49, 2
%91 = getelementptr inbounds float, float* %13, i64 %90
%92 = bitcast float* %91 to <8 x float>*
%93 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%94 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %92, i64 36)
%95 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%96 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%97 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %89, <8 x float> %94, <8 x float> %96)
%98 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %97, <8 x float>* %44, i64 36864)
%99 = mul i32 %40, 1280
%100 = or i32 %99, 32
%101 = add nuw nsw i32 %100, %34
%102 = or i64 %46, 1
%103 = mul nuw nsw i64 %102, 3
%104 = mul i32 %101, 160
%105 = add nsw i32 %104, %.decomposed
%106 = sext i32 %105 to i64
%107 = getelementptr inbounds float, float* %8, i64 %106
%108 = bitcast float* %107 to <8 x float>*
%109 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%110 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %108, i64 102400)
%111 = getelementptr inbounds float, float* %13, i64 %103
%112 = bitcast float* %111 to <8 x float>*
%113 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%114 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %112, i64 36)
%115 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%116 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%117 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %110, <8 x float> %114, <8 x float> %116)
%118 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %117, <8 x float>* %44, i64 36864)
%119 = mul i32 %101, 160
%120 = add i32 %119, 32
%121 = add nsw i32 %120, %.decomposed
%122 = sext i32 %121 to i64
%123 = getelementptr inbounds float, float* %8, i64 %122
%124 = bitcast float* %123 to <8 x float>*
%125 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%126 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %124, i64 102400)
%127 = add nuw nsw i64 %103, 1
%128 = getelementptr inbounds float, float* %13, i64 %127
%129 = bitcast float* %128 to <8 x float>*
%130 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%131 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %129, i64 36)
%132 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%133 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%134 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %126, <8 x float> %131, <8 x float> %133)
%135 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %134, <8 x float>* %44, i64 36864)
%136 = mul i32 %101, 160
%137 = add i32 %136, 64
%138 = add nsw i32 %137, %.decomposed
%139 = sext i32 %138 to i64
%140 = getelementptr inbounds float, float* %8, i64 %139
%141 = bitcast float* %140 to <8 x float>*
%142 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%143 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %141, i64 102400)
%144 = add nuw nsw i64 %103, 2
%145 = getelementptr inbounds float, float* %13, i64 %144
%146 = bitcast float* %145 to <8 x float>*
%147 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%148 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %146, i64 36)
%149 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%150 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%151 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %143, <8 x float> %148, <8 x float> %150)
%152 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %151, <8 x float>* %44, i64 36864)
%153 = mul i32 %40, 1280
%154 = or i32 %153, 64
%155 = add nuw nsw i32 %154, %34
%156 = or i64 %46, 2
%157 = mul nuw nsw i64 %156, 3
%158 = mul i32 %155, 160
%159 = add nsw i32 %158, %.decomposed
%160 = sext i32 %159 to i64
%161 = getelementptr inbounds float, float* %8, i64 %160
%162 = bitcast float* %161 to <8 x float>*
%163 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%164 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %162, i64 102400)
%165 = getelementptr inbounds float, float* %13, i64 %157
%166 = bitcast float* %165 to <8 x float>*
%167 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%168 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %166, i64 36)
%169 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%170 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%171 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %164, <8 x float> %168, <8 x float> %170)
%172 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %171, <8 x float>* %44, i64 36864)
%173 = mul i32 %155, 160
%174 = add i32 %173, 32
%175 = add nsw i32 %174, %.decomposed
%176 = sext i32 %175 to i64
%177 = getelementptr inbounds float, float* %8, i64 %176
%178 = bitcast float* %177 to <8 x float>*
%179 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%180 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %178, i64 102400)
%181 = or i64 %157, 1
%182 = getelementptr inbounds float, float* %13, i64 %181
%183 = bitcast float* %182 to <8 x float>*
%184 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%185 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %183, i64 36)
%186 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%187 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%188 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %180, <8 x float> %185, <8 x float> %187)
%189 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %188, <8 x float>* %44, i64 36864)
%190 = mul i32 %155, 160
%191 = add i32 %190, 64
%192 = add nsw i32 %191, %.decomposed
%193 = sext i32 %192 to i64
%194 = getelementptr inbounds float, float* %8, i64 %193
%195 = bitcast float* %194 to <8 x float>*
%196 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%197 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %195, i64 102400)
%198 = add nuw nsw i64 %157, 2
%199 = getelementptr inbounds float, float* %13, i64 %198
%200 = bitcast float* %199 to <8 x float>*
%201 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%202 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %200, i64 36)
%203 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
%204 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864)
%205 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %197, <8 x float> %202, <8 x float> %204)
%206 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8)
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %205, <8 x float>* %44, i64 36864)
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%207 = add nuw nsw i32 %40, 1
%exitcond = icmp eq i64 %indvars.iv.next, 32
br i1 %exitcond, label %for_end16, label %for_body15, !prof !21
for_end16: ; preds = %for_body15
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
%exitcond5 = icmp eq i64 %indvars.iv.next4, 9216
br i1 %exitcond5, label %for_end13, label %for_begin14.preheader, !prof !21
}
; Function Attrs: nounwind
declare i64 @llvm.riscv.vsetvl(i64, i64) #1
; Function Attrs: nounwind
declare void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float>, <8 x float>*, i64) #1
; Function Attrs: nounwind
declare <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>*, i64) #1
; Function Attrs: nounwind readnone speculatable
declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) #2
; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3
attributes #0 = { noinline }
attributes #1 = { nounwind }
attributes #2 = { nounwind readnone speculatable }
attributes #3 = { argmemonly nounwind }
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"tvm_target", !"llvm"}
!1 = !{!"branch_weights", i32 1048576, i32 1}
!2 = !{!3, !3, i64 0}
!3 = !{!"ctx_ptr", !4, i64 0}
!4 = !{!"tvm-tbaa"}
!5 = !{!6, !6, i64 0}
!6 = !{!"0x11c4750.w1.b0", !7, i64 0}
!7 = !{!"0x11c4750.w2.b0", !8, i64 0}
!8 = !{!"0x11c4750.w4.b0", !9, i64 0}
!9 = !{!"0x11c4750.w8.b0", !10, i64 0}
!10 = !{!"0x11c4750.w16.b0", !11, i64 0}
!11 = !{!"0x11c4750.w32.b0", !12, i64 0}
!12 = !{!"0x11c4750.w64.b0", !13, i64 0}
!13 = !{!"0x11c4750.w128.b0", !14, i64 0}
!14 = !{!"0x11c4750.w256.b0", !15, i64 0}
!15 = !{!"0x11c4750.w512.b0", !16, i64 0}
!16 = !{!"0x11c4750.w1024.b0", !17, i64 0}
!17 = !{!"int32", !18, i64 0}
!18 = !{!"0x11c4750", !4, i64 0}
!19 = !{!20, !20, i64 0}
!20 = !{!"0x11c4750.w1.b1", !7, i64 0}
!21 = !{!"branch_weights", i32 1, i32 1048576}
!22 = !{!23, !23, i64 0}
!23 = !{!"0x11cf4d0.w1.b0", !24, i64 0}
!24 = !{!"0x11cf4d0.w2.b0", !25, i64 0}
!25 = !{!"0x11cf4d0.w4.b0", !26, i64 0}
!26 = !{!"0x11cf4d0.w8.b0", !27, i64 0}
!27 = !{!"0x11cf4d0.w16.b0", !28, i64 0}
!28 = !{!"0x11cf4d0.w32.b0", !29, i64 0}
!29 = !{!"0x11cf4d0.w64.b0", !30, i64 0}
!30 = !{!"0x11cf4d0.w128.b0", !31, i64 0}
!31 = !{!"0x11cf4d0.w256.b0", !32, i64 0}
!32 = !{!"0x11cf4d0.w512.b0", !33, i64 0}
!33 = !{!"0x11cf4d0.w1024.b0", !34, i64 0}
!34 = !{!"int64", !35, i64 0}
!35 = !{!"0x11cf4d0", !4, i64 0}
!36 = !{!37, !37, i64 0}
!37 = !{!"0x11cf4d0.w1.b1", !24, i64 0}
!38 = !{!39, !39, i64 0}
!39 = !{!"0x11cf4d0.w1.b2", !40, i64 0}
!40 = !{!"0x11cf4d0.w2.b2", !25, i64 0}
!41 = !{!42, !42, i64 0}
!42 = !{!"0x11cf4d0.w1.b3", !40, i64 0}
!43 = !{!44, !44, i64 0}
!44 = !{!"0x11d24d0.w1.b0", !45, i64 0}
!45 = !{!"0x11d24d0.w2.b0", !46, i64 0}
!46 = !{!"0x11d24d0.w4.b0", !47, i64 0}
!47 = !{!"0x11d24d0.w8.b0", !48, i64 0}
!48 = !{!"0x11d24d0.w16.b0", !49, i64 0}
!49 = !{!"0x11d24d0.w32.b0", !50, i64 0}
!50 = !{!"0x11d24d0.w64.b0", !51, i64 0}
!51 = !{!"0x11d24d0.w128.b0", !52, i64 0}
!52 = !{!"0x11d24d0.w256.b0", !53, i64 0}
!53 = !{!"0x11d24d0.w512.b0", !54, i64 0}
!54 = !{!"0x11d24d0.w1024.b0", !55, i64 0}
!55 = !{!"int64", !56, i64 0}
!56 = !{!"0x11d24d0", !4, i64 0}
!57 = !{!58, !58, i64 0}
!58 = !{!"0x11d24d0.w1.b1", !45, i64 0}
!59 = !{!60, !60, i64 0}
!60 = !{!"0x11d24d0.w1.b2", !61, i64 0}
!61 = !{!"0x11d24d0.w2.b2", !46, i64 0}
!62 = !{!63, !63, i64 0}
!63 = !{!"0x11d24d0.w1.b3", !61, i64 0}
!64 = !{!65, !65, i64 0}
!65 = !{!"0x11c42b0.w1.b0", !66, i64 0}
!66 = !{!"0x11c42b0.w2.b0", !67, i64 0}
!67 = !{!"0x11c42b0.w4.b0", !68, i64 0}
!68 = !{!"0x11c42b0.w8.b0", !69, i64 0}
!69 = !{!"0x11c42b0.w16.b0", !70, i64 0}
!70 = !{!"0x11c42b0.w32.b0", !71, i64 0}
!71 = !{!"0x11c42b0.w64.b0", !72, i64 0}
!72 = !{!"0x11c42b0.w128.b0", !73, i64 0}
!73 = !{!"0x11c42b0.w256.b0", !74, i64 0}
!74 = !{!"0x11c42b0.w512.b0", !75, i64 0}
!75 = !{!"0x11c42b0.w1024.b0", !76, i64 0}
!76 = !{!"int64", !77, i64 0}
!77 = !{!"0x11c42b0", !4, i64 0}
!78 = !{!79, !79, i64 0}
!79 = !{!"0x11c42b0.w1.b1", !66, i64 0}
!80 = !{!81, !81, i64 0}
!81 = !{!"0x11c42b0.w1.b2", !82, i64 0}
!82 = !{!"0x11c42b0.w2.b2", !67, i64 0}
!83 = !{!84, !84, i64 0}
!84 = !{!"0x11c42b0.w1.b3", !82, i64 0}
!85 = !{!86, !86, i64 0}
!86 = !{!"0x11d0100.w1.b0", !87, i64 0}
!87 = !{!"0x11d0100.w2.b0", !88, i64 0}
!88 = !{!"0x11d0100.w4.b0", !89, i64 0}
!89 = !{!"0x11d0100.w8.b0", !90, i64 0}
!90 = !{!"0x11d0100.w16.b0", !91, i64 0}
!91 = !{!"0x11d0100.w32.b0", !92, i64 0}
!92 = !{!"0x11d0100.w64.b0", !93, i64 0}
!93 = !{!"0x11d0100.w128.b0", !94, i64 0}
!94 = !{!"0x11d0100.w256.b0", !95, i64 0}
!95 = !{!"0x11d0100.w512.b0", !96, i64 0}
!96 = !{!"0x11d0100.w1024.b0", !97, i64 0}
!97 = !{!"int64", !98, i64 0}
!98 = !{!"0x11d0100", !4, i64 0}
!99 = !{!100, !100, i64 0}
!100 = !{!"0x11d0100.w1.b1", !87, i64 0}
!101 = !{!102, !102, i64 0}
!102 = !{!"0x11d0100.w1.b2", !103, i64 0}
!103 = !{!"0x11d0100.w2.b2", !88, i64 0}
!104 = !{!105, !105, i64 0}
!105 = !{!"0x11d0100.w1.b3", !103, i64 0}
!106 = !{!107, !107, i64 0}
!107 = !{!"float32", !108, i64 0}
!108 = !{!"0x11c4300", !4, i64 0}
!109 = !{!110, !110, i64 0}
!110 = !{!"float32", !111, i64 0}
!111 = !{!"0x11c3540", !4, i64 0}
import tvm
import topi
batch = 1
in_channel = 256
in_height = 96
in_width = 96
filter_channel = in_channel
channel_multiplier = 1
filter_height = 3
filter_width = 3
stride_h = 1
stride_w = 1
padding = 'SAME' # or 'VALID'
# Placeholder
Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input')
Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter')
Stride = [stride_h, stride_w]
Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale')
Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift')
# Declare
DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, Stride, padding, 32)
s = tvm.create_schedule(DepthwiseConv2d.op)
# Origin Compute
print(tvm.lower(s, [Input, Filter], simple_mode=True))
n, cc, h, w = DepthwiseConv2d.op.axis
s[DepthwiseConv2d].reorder(n, h, w, cc)
c = s[DepthwiseConv2d].fuse(h, w)
fused = s[DepthwiseConv2d].fuse(n, c)
if DepthwiseConv2d.shape[1].value % 8 == 0:
cco, cci = s[DepthwiseConv2d].split(cc, factor=8)
elif DepthwiseConv2d.shape[1].value % 4 == 0:
cco, cci = s[DepthwiseConv2d].split(cc, factor=4)
else:
cco, cci = s[DepthwiseConv2d].split(cc, factor=1)
s[DepthwiseConv2d].reorder(fused, cco, cci)
s[DepthwiseConv2d].vectorize(cci)
# After schedule
print(tvm.lower(s, [Input, Filter], simple_mode=True))
# Get the llvm IR
m = tvm.build(s, [Input, Filter], target='llvm')
file_path = 'stride_load_store_sample.ll'
with open(file_path, "w") as f:
f.write(m.get_source())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment