Add target-specific DAG combiner for BUILD_VECTOR and VMOVRRD. An i64

value should be in GPRs when it's going to be used as a scalar, and we use VMOVRRD to make that happen, but if the value is converted back to a vector we need to fold to a simple bit_convert. Radar 8407927. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@114233 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bob Wilson <bob.wilson@apple.com> 2010-09-17 22:59:05 +0000
committer: Bob Wilson <bob.wilson@apple.com> 2010-09-17 22:59:05 +0000
commit: 75f0288b7dff1fcb24d9d61ea3e9547ba21382e4 (patch)
tree: 5091aa136523197a9f73e5a2185e09850c683355
parent: e6be85e9ff6bd28c599421a120a8491257c13ebd (diff)
2 files changed, 53 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index fbfb472d51..3111ff598b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -326,6 +326,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setTargetDAGCombine(ISD::ZERO_EXTEND);
     setTargetDAGCombine(ISD::ANY_EXTEND);
     setTargetDAGCombine(ISD::SELECT_CC);
+    setTargetDAGCombine(ISD::BUILD_VECTOR);
   }
 
   computeRegisterProperties();
@@ -4342,6 +4343,31 @@ static SDValue PerformORCombine(SDNode *N,
   return SDValue();
 }
 
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) {
+  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
+  // into a pair of GPRs, which is fine when the value is used as a scalar,
+  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+  if (N->getNumOperands() == 2) {
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+    if (Op0.getOpcode() == ISD::BIT_CONVERT)
+      Op0 = Op0.getOperand(0);
+    if (Op1.getOpcode() == ISD::BIT_CONVERT)
+      Op1 = Op1.getOperand(0);
+    if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+        Op0.getNode() == Op1.getNode() &&
+        Op0.getResNo() == 0 && Op1.getResNo() == 1) {
+      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+                         N->getValueType(0), Op0.getOperand(0));
+    }
+  }
+
+  return SDValue();
+}
+
 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
 /// ARMISD::VMOVRRD.
 static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4760,6 +4786,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SUB:        return PerformSUBCombine(N, DCI);
   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
+  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG);
   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
diff --git a/test/CodeGen/ARM/2010-09-17-vmovrrd-combine.ll b/test/CodeGen/ARM/2010-09-17-vmovrrd-combine.ll
new file mode 100644
index 0000000000..a210986c0b
--- /dev/null
+++ b/test/CodeGen/ARM/2010-09-17-vmovrrd-combine.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is
+; converted back to be used as a vector type.
+
+; CHECK: test:
+define <4 x i32> @test() nounwind {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %0 = bitcast <2 x i64> zeroinitializer to <2 x double>
+  %1 = extractelement <2 x double> %0, i32 0
+  %2 = bitcast double %1 to i64
+  %3 = insertelement <1 x i64> undef, i64 %2, i32 0
+; CHECK-NOT: vmov s
+; CHECK: vext.8
+  %4 = shufflevector <1 x i64> %3, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp2006.3 = bitcast <2 x i64> %4 to <16 x i8>
+  %5 = shufflevector <16 x i8> %tmp2006.3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+  %tmp2004.3 = bitcast <16 x i8> %5 to <4 x i32>
+  br i1 undef, label %bb2, label %bb1
+
+bb2:
+  %result = phi <4 x i32> [ undef, %entry ], [ %tmp2004.3, %bb1 ]
+  ret <4 x i32> %result
+}
author	Bob Wilson <bob.wilson@apple.com>	2010-09-17 22:59:05 +0000
committer	Bob Wilson <bob.wilson@apple.com>	2010-09-17 22:59:05 +0000
commit	75f0288b7dff1fcb24d9d61ea3e9547ba21382e4 (patch)
tree	5091aa136523197a9f73e5a2185e09850c683355
parent	e6be85e9ff6bd28c599421a120a8491257c13ebd (diff)