diff options
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 7 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll | 14 |
2 files changed, 19 insertions, 2 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5d24e92f23..514971f01e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4452,8 +4452,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could - // just use VDUPLANE. - if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + // just use VDUPLANE. We can only do this if the lane being extracted + // is at a constant index, as the VDUP from lane instructions only have + // constant-index forms. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Value->getOperand(1))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll new file mode 100644 index 0000000000..937ecc0d66 --- /dev/null +++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s + +define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind { +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128] +; CHECK: add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2 +; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32] +; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0] + %val = extractelement <4 x i32> %phitmp, i32 %lane + %r1 = insertelement <4 x i32> undef, i32 %val, i32 1 + %r2 = insertelement <4 x i32> %r1, i32 %val, i32 2 + %r3 = insertelement <4 x i32> %r2, i32 %val, i32 3 + store <4 x i32> %r3, <4 x i32>* %p, align 4 + ret void +} |