Revert 172708.

The optimization handles esoteric cases but adds a lot of complexity both to the X86 backend and to other backends. This optimization disables an important canonicalization of chains of SEXT nodes and makes SEXT and ZEXT asymmetrical. Disabling the canonicalization of consecutive SEXT nodes into a single node disables other DAG optimizations that assume that there is only one SEXT node. The AVX mask optimizations is one example. Additionally this optimization does not update the cost model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172968 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2013-01-20 08:35:56 +0000
committer: Nadav Rotem <nrotem@apple.com> 2013-01-20 08:35:56 +0000
commit: 0c8607ba6a21578996a7532b9390afba13bd2087 (patch)
tree: 71f7605e9ff90372ea8159b6f7be62b3d1a03fe2
parent: ec98d2ce5ec51148e442c024b77a7483c81eee6c (diff)
5 files changed, 10 insertions, 108 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3e5a446e6e..a82410ae6a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4298,19 +4298,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   if (isa<ConstantSDNode>(N0))
     return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
 
-  // Folding (sext (sext x)) is obvious, but we do it only after the type 
-  // legalization phase. When the sequence is like {(T1->T2), (T2->T3)} and 
-  // T1 or T3 (or the both) are illegal types, the TypeLegalizer may not 
-  // give a good sequence for the (T1->T3) pair.
-  // So we give a chance to target specific combiner to optimize T1->T2 and T2->T3
-  // separately and may be fold them in a preceding of subsequent instruction.
-  if (Level >= AfterLegalizeTypes) {
-    // fold (sext (sext x)) -> (sext x)
-    // fold (sext (aext x)) -> (sext x)
-    if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
-      return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
-                         N0.getOperand(0));
-  }
+  // fold (sext (sext x)) -> (sext x)
+  // fold (sext (aext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
 
   if (N0.getOpcode() == ISD::TRUNCATE) {
     // fold (sext (truncate (load x))) -> (sext (smaller load x))
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 91491bfe80..344d1447a8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2554,7 +2554,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
             VT.getVectorNumElements() ==
             Operand.getValueType().getVectorNumElements()) &&
            "Vector element count mismatch!");
-    if (OpOpcode == ISD::UNDEF)
+    if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+      return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::UNDEF)
       // sext(undef) = 0, because the top bits will all be the same.
       return getConstant(0, VT);
     break;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ec9f6751e1..c2b209d1b0 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17000,38 +17000,14 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
 static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   const X86Subtarget *Subtarget) {
-  EVT VT = N->getValueType(0);
-
-  if (!VT.isVector())
-    return SDValue();
-
-  SDValue In = N->getOperand(0);
-  EVT InVT = In.getValueType();
-  DebugLoc dl = N->getDebugLoc();
-  unsigned ExtendedEltSize = VT.getVectorElementType().getSizeInBits();
-
-  // Split SIGN_EXTEND operation to use vmovsx instruction when possible
-  if (InVT == MVT::v8i8) {
-    if (ExtendedEltSize > 16 && !Subtarget->hasInt256())
-      In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, In);
-    if (ExtendedEltSize > 32)
-      In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i32, In);
-    return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In);
-  }
-
-  if ((InVT == MVT::v4i8 || InVT == MVT::v4i16) &&
-      ExtendedEltSize > 32 && !Subtarget->hasInt256()) {
-    In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);
-    return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In);
-  }
-
   if (!DCI.isBeforeLegalizeOps())
     return SDValue();
 
   if (!Subtarget->hasFp256())
     return SDValue();
 
-  if (VT.is256BitVector()) {
+  EVT VT = N->getValueType(0);
+  if (VT.isVector() && VT.getSizeInBits() == 256) {
     SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
     if (R.getNode())
       return R;
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
index adee9bbe24..8d7d79db7d 100755
--- a/test/CodeGen/X86/avx-sext.ll
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -142,59 +142,3 @@ define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
  %Y = sext <8 x i8> %X to <8 x i16>
  ret <8 x i16>%Y
 }
-; AVX: sext_1
-; AVX: vpmovsxbd
-; AVX: vpmovsxdq
-; AVX: vpmovsxdq
-; AVX: ret
-define void @sext_1(<4 x i8>* %inbuf, <4 x i64>* %outbuf) {
-  %v0 = load <4 x i8>* %inbuf
-  %r = sext <4 x i8> %v0 to <4 x i64>                                         
-  store <4 x i64> %r, <4 x i64>* %outbuf                                         
-  ret void                                                               
-}
-
-; AVX: sext_2
-; AVX: vpmovsxbd
-; AVX: ret
-define void @sext_2(<4 x i8>* %inbuf, <4 x i32>* %outbuf) {
-  %v0 = load <4 x i8>* %inbuf
-  %r = sext <4 x i8> %v0 to <4 x i32>                                         
-  store <4 x i32> %r, <4 x i32>* %outbuf                                         
-  ret void                                                               
-}
-  
-; AVX: sext_3
-; AVX: vpmovsxwd
-; AVX: ret
-define void @sext_3(<4 x i16>* %inbuf, <4 x i32>* %outbuf) {
-  %v0 = load <4 x i16>* %inbuf
-  %r = sext <4 x i16> %v0 to <4 x i32>                                         
-  store <4 x i32> %r, <4 x i32>* %outbuf                                         
-  ret void                                                               
-}
-  
-; AVX: sext_4
-; AVX: vpmovsxwd
-; AVX: vpmovsxdq
-; AVX: vpmovsxdq
-; AVX: ret
-define void @sext_4(<4 x i16>* %inbuf, <4 x i64>* %outbuf) {
-  %v0 = load <4 x i16>* %inbuf
-  %r = sext <4 x i16> %v0 to <4 x i64>                                         
-  store <4 x i64> %r, <4 x i64>* %outbuf                                         
-  ret void                                                               
-}
-
-; AVX: sext_6
-; AVX: vpmovsxbw
-; AVX: vpmovsxwd
-; AVX: vpmovsxwd
-; AVX: ret
-define void @sext_6(<8 x i8>* %inbuf, <8 x i32>* %outbuf) {
-  %v0 = load <8 x i8>* %inbuf
-  %r = sext <8 x i8> %v0 to <8 x i32>                                         
-  store <8 x i32> %r, <8 x i32>* %outbuf                                         
-  ret void                                                               
-}
-
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index 17bd10a76e..3ce08dcc73 100755
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -107,15 +107,3 @@ define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
  %Y = sext <8 x i8> %X to <8 x i32>
  ret <8 x i32>%Y
 }
-
-; CHECK: load_sext_test6
-; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: vpmovsxdq
-; CHECK: vpmovsxdq
-; CHECK: ret 
-define <8 x i64> @load_sext_test6(<8 x i8> *%ptr) {
- %X = load <8 x i8>* %ptr
- %Y = sext <8 x i8> %X to <8 x i64>
- ret <8 x i64>%Y
-}
-
author	Nadav Rotem <nrotem@apple.com>	2013-01-20 08:35:56 +0000
committer	Nadav Rotem <nrotem@apple.com>	2013-01-20 08:35:56 +0000
commit	0c8607ba6a21578996a7532b9390afba13bd2087 (patch)
tree	71f7605e9ff90372ea8159b6f7be62b3d1a03fe2
parent	ec98d2ce5ec51148e442c024b77a7483c81eee6c (diff)