Skip to content

Instantly share code, notes, and snippets.

@BeMg
Last active May 21, 2020 06:18
Show Gist options
  • Save BeMg/e5083dd453054b4a0704e4a04f4a572f to your computer and use it in GitHub Desktop.
Save BeMg/e5083dd453054b4a0704e4a04f4a572f to your computer and use it in GitHub Desktop.
temp
; Function Attrs: nounwind readnone uwtable
define <4 x float> @_Z7_cl_sinDv4_f(<4 x float> %x) local_unnamed_addr #2 {
entry:
%r0 = alloca <4 x float>, align 16
%r1 = alloca <4 x float>, align 16
%astype = bitcast <4 x float> %x to <4 x i32>
%and = and <4 x i32> %astype, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%astype1 = bitcast <4 x i32> %and to <4 x float>
%0 = bitcast <4 x float>* %r0 to i8*
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) #10
%1 = bitcast <4 x float>* %r1 to i8*
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %1) #10
%2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1, <4 x float> <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>) #10
%3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %2) #10
%conv.i.i.i.i.i.i.i = sitofp <4 x i32> %3 to <4 x float>
%sub.i.i19.i.i.i.i.i = fsub <4 x float> %2, %conv.i.i.i.i.i.i.i
%4 = bitcast <4 x float> %2 to <2 x i64>
%and.i.i.i.i.i.i.i.i.i = and <2 x i64> %4, <i64 9223372034707292159, i64 9223372034707292159>
%5 = bitcast <2 x i64> %and.i.i.i.i.i.i.i.i.i to <4 x float>
%6 = fcmp oeq <4 x float> %5, <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
%7 = fcmp oge <4 x float> %5, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000>
%or.i.i2021.i.i.i.i.i = or <4 x i1> %7, %6
%sub.i.i.i.i.i.i.i = fsub <4 x float> %2, %sub.i.i19.i.i.i.i.i
%8 = bitcast <4 x float> %sub.i.i.i.i.i.i.i to <2 x i64>
%and.i.i10.i.i.i.i.i.i = and <2 x i64> %8, <i64 9223372034707292159, i64 9223372034707292159>
%and.i.i.i.i.i.i.i.i = and <2 x i64> %4, <i64 -9223372034707292160, i64 -9223372034707292160>
%xor.i.i11.i.i.i.i.i.i = or <2 x i64> %and.i.i10.i.i.i.i.i.i, %and.i.i.i.i.i.i.i.i
%9 = bitcast <2 x i64> %xor.i.i11.i.i.i.i.i.i to <4 x float>
%10 = select <4 x i1> %or.i.i2021.i.i.i.i.i, <4 x float> %2, <4 x float> %9
%astype.i.i.i = bitcast <4 x float> %10 to <4 x i32>
%and.i.i.i = and <4 x i32> %astype.i.i.i, <i32 -4096, i32 -4096, i32 -4096, i32 -4096>
%astype1.i.i.i = bitcast <4 x i32> %and.i.i.i to <4 x float>
%sub.i.i.i = fsub <4 x float> %10, %astype1.i.i.i
%mul.i.i.i = fmul <4 x float> %10, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
%fneg.i.i.i = fneg <4 x float> %mul.i.i.i
%11 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000>, <4 x float> %fneg.i.i.i) #10
%12 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000>, <4 x float> %11) #10
%13 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000>, <4 x float> %12) #10
%14 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000>, <4 x float> %13) #10
%sub.i.i = fsub <4 x float> %astype1, %mul.i.i.i
%sub2.i.i = fsub <4 x float> %astype1, %sub.i.i
%sub3.i.i = fsub <4 x float> %sub2.i.i, %mul.i.i.i
%sub4.i.i = fsub <4 x float> %sub3.i.i, %14
%add.i.i = fadd <4 x float> %sub.i.i, %sub4.i.i
%mul.i44.i.i = fmul <4 x float> %10, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
%fneg.i45.i.i = fneg <4 x float> %mul.i44.i.i
%15 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000>, <4 x float> %fneg.i45.i.i) #10
%16 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000>, <4 x float> %15) #10
%17 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000>, <4 x float> %16) #10
%18 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000>, <4 x float> %17) #10
%sub5.i.i = fsub <4 x float> %add.i.i, %mul.i44.i.i
%sub6.i.i = fsub <4 x float> %add.i.i, %sub5.i.i
%sub7.i.i = fsub <4 x float> %sub6.i.i, %mul.i44.i.i
%sub8.i.i = fsub <4 x float> %sub7.i.i, %18
%add9.i.i = fadd <4 x float> %sub5.i.i, %sub8.i.i
%mul.i54.i.i = fmul <4 x float> %10, <float 0x3CF8469880000000, float 0x3CF8469880000000, float 0x3CF8469880000000, float 0x3CF8469880000000>
%fneg.i55.i.i = fneg <4 x float> %mul.i54.i.i
%19 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000>, <4 x float> %fneg.i55.i.i) #10
%20 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000>, <4 x float> %19) #10
%21 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000>, <4 x float> %20) #10
%22 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000>, <4 x float> %21) #10
%sub10.i.i = fsub <4 x float> %add9.i.i, %mul.i54.i.i
%sub11.i.i = fsub <4 x float> %add9.i.i, %sub10.i.i
%sub12.i.i = fsub <4 x float> %sub11.i.i, %mul.i54.i.i
%add13.i.i = fadd <4 x float> %sub10.i.i, %sub12.i.i
store <4 x float> %add13.i.i, <4 x float>* %r0, align 16, !tbaa !6
%fneg.i.i = fneg <4 x float> %22
store <4 x float> %fneg.i.i, <4 x float>* %r1, align 16, !tbaa !6
%cmp.i = fcmp oge <4 x float> %astype1, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000>
%sext.i = sext <4 x i1> %cmp.i to <4 x i32>
%.bc.i.i = bitcast <4 x i32> %sext.i to <2 x double>
%.extract.i.i = extractelement <2 x double> %.bc.i.i, i32 0
%23 = bitcast double %.extract.i.i to <2 x i32>
%24 = extractelement <2 x i32> %23, i64 0
%25 = extractelement <2 x i32> %23, i64 1
%26 = or i32 %24, %25
%27 = icmp sgt i32 %26, -1
br i1 %27, label %_Z7_cl_anyDv4_i.exit.i, label %if.then.i
_Z7_cl_anyDv4_i.exit.i: ; preds = %entry
%28 = fptosi <4 x float> %10 to <4 x i32>
%and.i.i = and <4 x i32> %28, <i32 3, i32 3, i32 3, i32 3>
%.extract6.i.i = extractelement <2 x double> %.bc.i.i, i32 1
%29 = bitcast double %.extract6.i.i to <2 x i32>
%30 = extractelement <2 x i32> %29, i64 0
%31 = extractelement <2 x i32> %29, i64 1
%32 = or i32 %31, %30
%tobool.i = icmp sgt i32 %32, -1
br i1 %tobool.i, label %_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit, label %if.then.i
if.then.i: ; preds = %_Z7_cl_anyDv4_i.exit.i, %entry
%call3.i = call <4 x i32> @_Z25__pocl_argReductionLargeSPU9CLprivateDv4_fS1_S_(<4 x float>* nonnull %r0, <4 x float>* nonnull %r1, <4 x float> %astype1) #10
%.pre = load <4 x float>, <4 x float>* %r0, align 16, !tbaa !6
%.pre6 = load <4 x float>, <4 x float>* %r1, align 16, !tbaa !6
br label %_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit
_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit: ; preds = %if.then.i, %_Z7_cl_anyDv4_i.exit.i
%33 = phi <4 x float> [ %.pre6, %if.then.i ], [ %fneg.i.i, %_Z7_cl_anyDv4_i.exit.i ]
%34 = phi <4 x float> [ %.pre, %if.then.i ], [ %add13.i.i, %_Z7_cl_anyDv4_i.exit.i ]
%retval1.0.i = phi <4 x i32> [ %call3.i, %if.then.i ], [ %and.i.i, %_Z7_cl_anyDv4_i.exit.i ]
%mul.i = fmul <4 x float> %34, %34
%mul1.i = fmul <4 x float> %34, %mul.i
%35 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 0x3DE5D93A60000000, float 0x3DE5D93A60000000, float 0x3DE5D93A60000000, float 0x3DE5D93A60000000>, <4 x float> <float 0xBE5AE5E680000000, float 0xBE5AE5E680000000, float 0xBE5AE5E680000000, float 0xBE5AE5E680000000>) #10
%36 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %35, <4 x float> <float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000>) #10
%37 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %36, <4 x float> <float 0xBF2A013A80000000, float 0xBF2A013A80000000, float 0xBF2A013A80000000, float 0xBF2A013A80000000>) #10
%38 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %37, <4 x float> <float 0x3F811110E0000000, float 0x3F811110E0000000, float 0x3F811110E0000000, float 0x3F811110E0000000>) #10
%fneg.i = fneg <4 x float> %mul1.i
%mul5.i = fmul <4 x float> %38, %fneg.i
%39 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %33, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> %mul5.i) #10
%fneg7.i = fneg <4 x float> %33
%40 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %39, <4 x float> %fneg7.i) #10
%41 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul1.i, <4 x float> <float 0x3FC5555560000000, float 0x3FC5555560000000, float 0x3FC5555560000000, float 0x3FC5555560000000>, <4 x float> %40) #10
%sub.i = fsub <4 x float> %34, %41
%42 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000>, <4 x float> <float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000>) #10
%43 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %42, <4 x float> <float 0xBE92524740000000, float 0xBE92524740000000, float 0xBE92524740000000, float 0xBE92524740000000>) #10
%44 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %43, <4 x float> <float 0x3EFA015C40000000, float 0x3EFA015C40000000, float 0x3EFA015C40000000, float 0x3EFA015C40000000>) #10
%45 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %44, <4 x float> <float 0xBF56C16C00000000, float 0xBF56C16C00000000, float 0xBF56C16C00000000, float 0xBF56C16C00000000>) #10
%46 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %45, <4 x float> <float 0x3FA5555560000000, float 0x3FA5555560000000, float 0x3FA5555560000000, float 0x3FA5555560000000>) #10
%mul5.i2 = fmul <4 x float> %mul.i, %46
%astype.i = bitcast <4 x float> %34 to <4 x i32>
%and.i = and <4 x i32> %astype.i, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sub.i3 = add nsw <4 x i32> %and.i, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
%cmp.i4 = icmp ugt <4 x i32> %and.i, <i32 1050253721, i32 1050253721, i32 1050253721, i32 1050253721>
%cmp7.i = icmp ult <4 x i32> %and.i, <i32 1061683201, i32 1061683201, i32 1061683201, i32 1061683201>
%and952.i = and <4 x i1> %cmp.i4, %cmp7.i
%47 = select <4 x i1> %and952.i, <4 x i32> %sub.i3, <4 x i32> zeroinitializer
%48 = select <4 x i1> %cmp7.i, <4 x i32> %47, <4 x i32> <i32 1049624576, i32 1049624576, i32 1049624576, i32 1049624576>
%49 = bitcast <4 x i32> %48 to <4 x float>
%fneg.i5 = fneg <4 x float> %49
%50 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> %fneg.i5) #10
%sub16.i = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %49
%fneg17.i = fneg <4 x float> %34
%mul18.i = fmul <4 x float> %33, %fneg17.i
%51 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %mul5.i2, <4 x float> %mul18.i) #10
%sub20.i = fsub <4 x float> %50, %51
%sub21.i = fsub <4 x float> %sub16.i, %sub20.i
%.mask = and <4 x i32> %retval1.0.i, <i32 1, i32 1, i32 1, i32 1>
%52 = icmp eq <4 x i32> %.mask, zeroinitializer
%.v = select <4 x i1> %52, <4 x float> %sub.i, <4 x float> %sub21.i
%53 = bitcast <4 x float> %.v to <4 x i32>
%cmp = icmp sgt <4 x i32> %retval1.0.i, <i32 1, i32 1, i32 1, i32 1>
%54 = select <4 x i1> %cmp, <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, <4 x i32> zeroinitializer
%xor8 = and <4 x i32> %astype, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
%xor = xor <4 x i32> %54, %xor8
%xor9 = xor <4 x i32> %xor, %53
%cmp11 = icmp ult <4 x i32> %and, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
%55 = select <4 x i1> %cmp11, <4 x i32> %xor9, <4 x i32> <i32 2143289344, i32 2143289344, i32 2143289344, i32 2143289344>
%cmp15 = fcmp une <4 x float> %x, zeroinitializer
%56 = select <4 x i1> %cmp15, <4 x i32> %55, <4 x i32> %astype
%57 = bitcast <4 x i32> %56 to <4 x float>
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %1) #10
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) #10
ret <4 x float> %57
}
; Function Attrs: nounwind readnone uwtable
define <4 x float> @_Z7_cl_cosDv4_f(<4 x float> %x) local_unnamed_addr #2 {
entry:
%r0 = alloca <4 x float>, align 16
%r1 = alloca <4 x float>, align 16
%astype = bitcast <4 x float> %x to <4 x i32>
%and = and <4 x i32> %astype, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%astype1 = bitcast <4 x i32> %and to <4 x float>
%0 = bitcast <4 x float>* %r0 to i8*
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) #10
%1 = bitcast <4 x float>* %r1 to i8*
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %1) #10
%2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1, <4 x float> <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>) #10
%3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %2) #10
%conv.i.i.i.i.i.i.i = sitofp <4 x i32> %3 to <4 x float>
%sub.i.i19.i.i.i.i.i = fsub <4 x float> %2, %conv.i.i.i.i.i.i.i
%4 = bitcast <4 x float> %2 to <2 x i64>
%and.i.i.i.i.i.i.i.i.i = and <2 x i64> %4, <i64 9223372034707292159, i64 9223372034707292159>
%5 = bitcast <2 x i64> %and.i.i.i.i.i.i.i.i.i to <4 x float>
%6 = fcmp oeq <4 x float> %5, <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
%7 = fcmp oge <4 x float> %5, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000>
%or.i.i2021.i.i.i.i.i = or <4 x i1> %7, %6
%sub.i.i.i.i.i.i.i = fsub <4 x float> %2, %sub.i.i19.i.i.i.i.i
%8 = bitcast <4 x float> %sub.i.i.i.i.i.i.i to <2 x i64>
%and.i.i10.i.i.i.i.i.i = and <2 x i64> %8, <i64 9223372034707292159, i64 9223372034707292159>
%and.i.i.i.i.i.i.i.i = and <2 x i64> %4, <i64 -9223372034707292160, i64 -9223372034707292160>
%xor.i.i11.i.i.i.i.i.i = or <2 x i64> %and.i.i10.i.i.i.i.i.i, %and.i.i.i.i.i.i.i.i
%9 = bitcast <2 x i64> %xor.i.i11.i.i.i.i.i.i to <4 x float>
%10 = select <4 x i1> %or.i.i2021.i.i.i.i.i, <4 x float> %2, <4 x float> %9
%astype.i.i.i = bitcast <4 x float> %10 to <4 x i32>
%and.i.i.i = and <4 x i32> %astype.i.i.i, <i32 -4096, i32 -4096, i32 -4096, i32 -4096>
%astype1.i.i.i = bitcast <4 x i32> %and.i.i.i to <4 x float>
%sub.i.i.i = fsub <4 x float> %10, %astype1.i.i.i
%mul.i.i.i = fmul <4 x float> %10, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
%fneg.i.i.i = fneg <4 x float> %mul.i.i.i
%11 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000>, <4 x float> %fneg.i.i.i) #10
%12 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000>, <4 x float> %11) #10
%13 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000>, <4 x float> %12) #10
%14 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000>, <4 x float> %13) #10
%sub.i.i = fsub <4 x float> %astype1, %mul.i.i.i
%sub2.i.i = fsub <4 x float> %astype1, %sub.i.i
%sub3.i.i = fsub <4 x float> %sub2.i.i, %mul.i.i.i
%sub4.i.i = fsub <4 x float> %sub3.i.i, %14
%add.i.i = fadd <4 x float> %sub.i.i, %sub4.i.i
%mul.i44.i.i = fmul <4 x float> %10, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
%fneg.i45.i.i = fneg <4 x float> %mul.i44.i.i
%15 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000>, <4 x float> %fneg.i45.i.i) #10
%16 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000>, <4 x float> %15) #10
%17 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000>, <4 x float> %16) #10
%18 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000>, <4 x float> %17) #10
%sub5.i.i = fsub <4 x float> %add.i.i, %mul.i44.i.i
%sub6.i.i = fsub <4 x float> %add.i.i, %sub5.i.i
%sub7.i.i = fsub <4 x float> %sub6.i.i, %mul.i44.i.i
%sub8.i.i = fsub <4 x float> %sub7.i.i, %18
%add9.i.i = fadd <4 x float> %sub5.i.i, %sub8.i.i
%mul.i54.i.i = fmul <4 x float> %10, <float 0x3CF8469880000000, float 0x3CF8469880000000, float 0x3CF8469880000000, float 0x3CF8469880000000>
%fneg.i55.i.i = fneg <4 x float> %mul.i54.i.i
%19 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000>, <4 x float> %fneg.i55.i.i) #10
%20 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000>, <4 x float> %19) #10
%21 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000>, <4 x float> %20) #10
%22 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000>, <4 x float> %21) #10
%sub10.i.i = fsub <4 x float> %add9.i.i, %mul.i54.i.i
%sub11.i.i = fsub <4 x float> %add9.i.i, %sub10.i.i
%sub12.i.i = fsub <4 x float> %sub11.i.i, %mul.i54.i.i
%add13.i.i = fadd <4 x float> %sub10.i.i, %sub12.i.i
store <4 x float> %add13.i.i, <4 x float>* %r0, align 16, !tbaa !6
%fneg.i.i = fneg <4 x float> %22
store <4 x float> %fneg.i.i, <4 x float>* %r1, align 16, !tbaa !6
%cmp.i = fcmp oge <4 x float> %astype1, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000>
%sext.i = sext <4 x i1> %cmp.i to <4 x i32>
%.bc.i.i = bitcast <4 x i32> %sext.i to <2 x double>
%.extract.i.i = extractelement <2 x double> %.bc.i.i, i32 0
%23 = bitcast double %.extract.i.i to <2 x i32>
%24 = extractelement <2 x i32> %23, i64 0
%25 = extractelement <2 x i32> %23, i64 1
%26 = or i32 %24, %25
%27 = icmp sgt i32 %26, -1
br i1 %27, label %_Z7_cl_anyDv4_i.exit.i, label %if.then.i
_Z7_cl_anyDv4_i.exit.i: ; preds = %entry
%28 = fptosi <4 x float> %10 to <4 x i32>
%and.i.i = and <4 x i32> %28, <i32 3, i32 3, i32 3, i32 3>
%.extract6.i.i = extractelement <2 x double> %.bc.i.i, i32 1
%29 = bitcast double %.extract6.i.i to <2 x i32>
%30 = extractelement <2 x i32> %29, i64 0
%31 = extractelement <2 x i32> %29, i64 1
%32 = or i32 %31, %30
%tobool.i = icmp sgt i32 %32, -1
br i1 %tobool.i, label %_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit, label %if.then.i
if.then.i: ; preds = %_Z7_cl_anyDv4_i.exit.i, %entry
%call3.i = call <4 x i32> @_Z25__pocl_argReductionLargeSPU9CLprivateDv4_fS1_S_(<4 x float>* nonnull %r0, <4 x float>* nonnull %r1, <4 x float> %astype1) #10
%.pre = load <4 x float>, <4 x float>* %r0, align 16, !tbaa !6
%.pre6 = load <4 x float>, <4 x float>* %r1, align 16, !tbaa !6
br label %_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit
_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit: ; preds = %if.then.i, %_Z7_cl_anyDv4_i.exit.i
%33 = phi <4 x float> [ %.pre6, %if.then.i ], [ %fneg.i.i, %_Z7_cl_anyDv4_i.exit.i ]
%34 = phi <4 x float> [ %.pre, %if.then.i ], [ %add13.i.i, %_Z7_cl_anyDv4_i.exit.i ]
%retval1.0.i = phi <4 x i32> [ %call3.i, %if.then.i ], [ %and.i.i, %_Z7_cl_anyDv4_i.exit.i ]
%mul.i = fmul <4 x float> %34, %34
%mul1.i = fmul <4 x float> %34, %mul.i
%35 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 0x3DE5D93A60000000, float 0x3DE5D93A60000000, float 0x3DE5D93A60000000, float 0x3DE5D93A60000000>, <4 x float> <float 0xBE5AE5E680000000, float 0xBE5AE5E680000000, float 0xBE5AE5E680000000, float 0xBE5AE5E680000000>) #10
%36 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %35, <4 x float> <float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000>) #10
%37 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %36, <4 x float> <float 0xBF2A013A80000000, float 0xBF2A013A80000000, float 0xBF2A013A80000000, float 0xBF2A013A80000000>) #10
%38 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %37, <4 x float> <float 0x3F811110E0000000, float 0x3F811110E0000000, float 0x3F811110E0000000, float 0x3F811110E0000000>) #10
%fneg.i = fneg <4 x float> %mul1.i
%mul5.i = fmul <4 x float> %38, %fneg.i
%39 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %33, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> %mul5.i) #10
%fneg7.i = fneg <4 x float> %33
%40 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %39, <4 x float> %fneg7.i) #10
%41 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul1.i, <4 x float> <float 0x3FC5555560000000, float 0x3FC5555560000000, float 0x3FC5555560000000, float 0x3FC5555560000000>, <4 x float> %40) #10
%sub.i = fsub <4 x float> %34, %41
%fneg = fneg <4 x float> %sub.i
%42 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000>, <4 x float> <float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000>) #10
%43 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %42, <4 x float> <float 0xBE92524740000000, float 0xBE92524740000000, float 0xBE92524740000000, float 0xBE92524740000000>) #10
%44 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %43, <4 x float> <float 0x3EFA015C40000000, float 0x3EFA015C40000000, float 0x3EFA015C40000000, float 0x3EFA015C40000000>) #10
%45 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %44, <4 x float> <float 0xBF56C16C00000000, float 0xBF56C16C00000000, float 0xBF56C16C00000000, float 0xBF56C16C00000000>) #10
%46 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %45, <4 x float> <float 0x3FA5555560000000, float 0x3FA5555560000000, float 0x3FA5555560000000, float 0x3FA5555560000000>) #10
%mul5.i2 = fmul <4 x float> %mul.i, %46
%astype.i = bitcast <4 x float> %34 to <4 x i32>
%and.i = and <4 x i32> %astype.i, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%sub.i3 = add nsw <4 x i32> %and.i, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
%cmp.i4 = icmp ugt <4 x i32> %and.i, <i32 1050253721, i32 1050253721, i32 1050253721, i32 1050253721>
%cmp7.i = icmp ult <4 x i32> %and.i, <i32 1061683201, i32 1061683201, i32 1061683201, i32 1061683201>
%and952.i = and <4 x i1> %cmp.i4, %cmp7.i
%47 = select <4 x i1> %and952.i, <4 x i32> %sub.i3, <4 x i32> zeroinitializer
%48 = select <4 x i1> %cmp7.i, <4 x i32> %47, <4 x i32> <i32 1049624576, i32 1049624576, i32 1049624576, i32 1049624576>
%49 = bitcast <4 x i32> %48 to <4 x float>
%fneg.i5 = fneg <4 x float> %49
%50 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> %fneg.i5) #10
%sub16.i = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %49
%fneg17.i = fneg <4 x float> %34
%mul18.i = fmul <4 x float> %33, %fneg17.i
%51 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %mul5.i2, <4 x float> %mul18.i) #10
%sub20.i = fsub <4 x float> %50, %51
%sub21.i = fsub <4 x float> %sub16.i, %sub20.i
%.mask = and <4 x i32> %retval1.0.i, <i32 1, i32 1, i32 1, i32 1>
%52 = icmp eq <4 x i32> %.mask, zeroinitializer
%.v = select <4 x i1> %52, <4 x float> %sub21.i, <4 x float> %fneg
%53 = bitcast <4 x float> %.v to <4 x i32>
%shr29 = lshr <4 x i32> %retval1.0.i, <i32 1, i32 1, i32 1, i32 1>
%shl4 = shl <4 x i32> %shr29, <i32 31, i32 31, i32 31, i32 31>
%xor = xor <4 x i32> %shl4, %53
%cmp = icmp ult <4 x i32> %and, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
%54 = select <4 x i1> %cmp, <4 x i32> %xor, <4 x i32> <i32 2143289344, i32 2143289344, i32 2143289344, i32 2143289344>
%cmp10 = fcmp une <4 x float> %x, zeroinitializer
%55 = bitcast <4 x i32> %54 to <4 x float>
%56 = select <4 x i1> %cmp10, <4 x float> %55, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %1) #10
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) #10
ret <4 x float> %56
}
__kernel
void test_sin_cos() {
volatile float4 va = (float4)(3.0f, 5.0f, -2.0f, -9.0f);
volatile float4 vb = (float4)(2.0f, -4.4f, -1.0f, -20.0f);
float4 vsin = sin(va);
float4 vcos = cos(vb);
if (any(vsin != (float4)(3.0f, 5.0f, -1.0f, -9.0f)) ||
any(vcos != (float4)(2.0f, -4.4f, -2.0f, -20.0f))) {
printf("Make sure this function has side effect.\n");
}
}
__kernel
void test_min_max() {
volatile float4 va = (float4)(3.0f, 5.0f, -2.0f, -9.0f);
volatile float4 vb = (float4)(2.0f, -4.4f, -1.0f, -20.0f);
float4 vmax = max(va, vb);
float4 vmin = min(va, vb);
if (any(vmax != (float4)(3.0f, 5.0f, -1.0f, -9.0f)) ||
any(vmin != (float4)(2.0f, -4.4f, -2.0f, -20.0f))) {
printf("min or max on float4 failed.\n");
}
}
__kernel void vec_add(__global int *out,
__global const int *in1,
__global const int *in2) {
int i = get_global_id(0);
test_sin_cos();
test_min_max();
out[i] = in1[i] + in2[i];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment