diff options
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 22 | ||||
-rw-r--r-- | test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll | 302 |
2 files changed, 324 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2045073fb6..229d8cf138 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -468,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. + // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively + // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + // FIXME: Code duplication: FDIV and FREM are expanded always, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); + // FIXME: Create unittest. + // In another words, find a way when "copysign" appears in DAG with vector + // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + // FIXME: Code duplication: SETCC has custom operation action, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); + // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -487,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + setOperationAction(ISD::FSIN, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); + setOperationAction(ISD::FPOW, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll new file mode 100644 index 0000000000..099221f9d4 --- /dev/null +++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -0,0 +1,302 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s + +@A = global <4 x float> <float 0., float 1., float 2., float 3.> + +define void @test_sqrt(<4 x float>* %X) nounwind { + +; CHECK: test_sqrt: + +; CHECK: movw r1, :lower16:{{.*}} +; CHECK-NEXT: movt r1, :upper16:{{.*}} +; CHECK: vldmia r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]} +; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short3]] +; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short2]] +; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short1]] +; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short0]] +; CHECK-NEXT: vstmia {{.*}} + +L.entry: + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0) + store <4 x float> %1, <4 x float>* %X, align 16 + ret void +} + +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readonly + + +define void @test_cos(<4 x float>* %X) nounwind { + +; CHECK: test_cos: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK: vldmia [[reg0]], {{.*}} + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl cosf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl cosf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl cosf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl cosf + +; CHECK: vstmia {{.*}} + +L.entry: + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0) + store <4 x float> %1, <4 x float>* %X, align 16 + ret void +} + +declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly + +define void @test_exp(<4 x float>* %X) nounwind { + +; CHECK: test_exp: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK: vldmia [[reg0]], {{.*}} + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl expf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl expf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl expf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl expf + +; CHECK: vstmia {{.*}} + +L.entry: + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0) + store <4 x float> %1, <4 x float>* %X, align 16 + ret void +} + +declare <4 x float> @llvm.exp.v4f32(<4 x float>) nounwind readonly + +define void @test_exp2(<4 x float>* %X) nounwind { + +; CHECK: test_exp2: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK: vldmia [[reg0]], {{.*}} + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl exp2f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl exp2f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl exp2f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl exp2f + +; CHECK: vstmia {{.*}} + +L.entry: + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0) + store <4 x float> %1, <4 x float>* %X, align 16 + ret void +} + +declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nounwind readonly + +define void @test_log10(<4 x float>* %X) nounwind { + +; CHECK: test_log10: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK: vldmia [[reg0]], {{.*}} + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl log10f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl log10f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl log10f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl log10f + +; CHECK: vstmia {{.*}} + +L.entry: + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0) + store <4 x float> %1, <4 x float>* %X, align 16 + ret void +} + +declare <4 x float> @llvm.log10.v4f32(<4 x float>) nounwind readonly + +define void @test_log(<4 x float>* %X) nounwind { + +; CHECK: test_log: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK: vldmia [[reg0]], {{.*}} + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl logf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl logf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl logf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl logf + +; CHECK: vstmia {{.*}} + +L.entry: + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0) + store <4 x float> %1, <4 x float>* %X, align 16 + ret void +} + +declare <4 x float> @llvm.log.v4f32(<4 x float>) nounwind readonly + +define void @test_log2(<4 x float>* %X) nounwind { + +; CHECK: test_log2: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK: vldmia [[reg0]], {{.*}} + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl log2f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl log2f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl log2f + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl log2f + +; CHECK: vstmia {{.*}} + +L.entry: + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0) + store <4 x float> %1, <4 x float>* %X, align 16 + ret void +} + +declare <4 x float> @llvm.log2.v4f32(<4 x float>) nounwind readonly + + +define void @test_pow(<4 x float>* %X) nounwind { + +; CHECK: test_pow: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK: vldmia [[reg0]], {{.*}} + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl powf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl powf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl powf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl powf + +; CHECK: vstmia {{.*}} + +L.entry: + + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>) + + store <4 x float> %1, <4 x float>* %X, align 16 + + ret void +} + +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) nounwind readonly + +define void @test_powi(<4 x float>* %X) nounwind { + +; CHECK: test_powi: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK-NEXT: vldmia [[reg0]], {{.*}} +; CHECK: vmul.f32 {{.*}} + +; CHECK: vstmia {{.*}} + +L.entry: + + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2) + + store <4 x float> %1, <4 x float>* %X, align 16 + + ret void +} + +declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32) nounwind readonly + +define void @test_sin(<4 x float>* %X) nounwind { + +; CHECK: test_sin: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK-NEXT: movt [[reg0]], :upper16:{{.*}} +; CHECK: vldmia [[reg0]], {{.*}} + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl sinf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl sinf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl sinf + +; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: bl sinf + +; CHECK: vstmia {{.*}} + +L.entry: + %0 = load <4 x float>* @A, align 16 + %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0) + store <4 x float> %1, <4 x float>* %X, align 16 + ret void +} + +declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly + |