11 files changed, 1332 insertions, 83 deletions
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index c655d3672c..1a08f4d608 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -49,25 +49,28 @@ namespace llvm {
 
       isVoid         =  13,   // This has no value
 
-      v8i8           =  14,   //  8 x i8
-      v4i16          =  15,   //  4 x i16
-      v2i32          =  16,   //  2 x i32
-      v1i64          =  17,   //  1 x i64
-      v16i8          =  18,   // 16 x i8
-      v8i16          =  19,   //  8 x i16
-      v3i32          =  20,   //  3 x i32
-      v4i32          =  21,   //  4 x i32
-      v2i64          =  22,   //  2 x i64
-
-      v2f32          =  23,   //  2 x f32
-      v3f32          =  24,   //  3 x f32
-      v4f32          =  25,   //  4 x f32
-      v2f64          =  26,   //  2 x f64
-
-      FIRST_VECTOR_VALUETYPE = v8i8,
+      v2i8           =  14,   //  2 x i8
+      v4i8           =  15,   //  4 x i8
+      v2i16          =  16,   //  2 x i16
+      v8i8           =  17,   //  8 x i8
+      v4i16          =  18,   //  4 x i16
+      v2i32          =  19,   //  2 x i32
+      v1i64          =  20,   //  1 x i64
+      v16i8          =  21,   // 16 x i8
+      v8i16          =  22,   //  8 x i16
+      v3i32          =  23,   //  3 x i32
+      v4i32          =  24,   //  4 x i32
+      v2i64          =  25,   //  2 x i64
+
+      v2f32          =  26,   //  2 x f32
+      v3f32          =  27,   //  3 x f32
+      v4f32          =  28,   //  4 x f32
+      v2f64          =  29,   //  2 x f64
+
+      FIRST_VECTOR_VALUETYPE = v2i8,
       LAST_VECTOR_VALUETYPE  = v2f64,
 
-      LAST_VALUETYPE =  27,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  30,   // This always remains at the end of the list.
 
       // iPTRAny - An int value the size of the pointer of the current
       // target to any address space. This must only be used internal to
@@ -166,10 +169,13 @@ namespace llvm {
       default:
         break;
       case i8:
+        if (NumElements == 2)  return v2i8;
+        if (NumElements == 4)  return v4i8;
         if (NumElements == 8)  return v8i8;
         if (NumElements == 16) return v16i8;
         break;
       case i16:
+        if (NumElements == 2)  return v2i16;
         if (NumElements == 4)  return v4i16;
         if (NumElements == 8)  return v8i16;
         break;
@@ -233,7 +239,7 @@ namespace llvm {
       return isSimple() ?
              ((SimpleTy >= FIRST_INTEGER_VALUETYPE &&
                SimpleTy <= LAST_INTEGER_VALUETYPE) ||
-              (SimpleTy >= v8i8 && SimpleTy <= v2i64)) :
+              (SimpleTy >= v2i8 && SimpleTy <= v2i64)) :
              isExtendedInteger();
     }
 
@@ -312,8 +318,11 @@ namespace llvm {
       switch (V) {
       default:
         return getExtendedVectorElementType();
+      case v2i8 :
+      case v4i8 :
       case v8i8 :
       case v16i8: return i8;
+      case v2i16:
       case v4i16:
       case v8i16: return i16;
       case v2i32:
@@ -338,11 +347,14 @@ namespace llvm {
       case v16i8: return 16;
       case v8i8 :
       case v8i16: return 8;
+      case v4i8:
       case v4i16:
       case v4i32:
       case v4f32: return 4;
       case v3i32:
       case v3f32: return 3;
+      case v2i8:
+      case v2i16:
       case v2i32:
       case v2i64:
       case v2f32:
@@ -364,9 +376,12 @@ namespace llvm {
         return getExtendedSizeInBits();
       case i1  :  return 1;
       case i8  :  return 8;
-      case i16 :  return 16;
+      case i16 :
+      case v2i8:  return 16;
       case f32 :
-      case i32 :  return 32;
+      case i32 :
+      case v4i8:
+      case v2i16: return 32;
       case f64 :
       case i64 :
       case v8i8:
@@ -407,6 +422,25 @@ namespace llvm {
         return getIntegerVT(1 << Log2_32_Ceil(BitWidth));
     }
 
+    /// isPow2VectorType - Retuns true if the given vector is a power of 2.
+    bool isPow2VectorType() const {
+      unsigned NElts = getVectorNumElements();
+      return !(NElts & (NElts - 1));
+    }
+
+    /// getPow2VectorType - Widens the length of the given vector MVT up to
+    /// the nearest power of 2 and returns that type.
+    MVT getPow2VectorType() const {
+      if (!isPow2VectorType()) {
+        unsigned NElts = getVectorNumElements();
+        unsigned Pow2NElts = 1 <<  Log2_32_Ceil(NElts);
+        return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
+      }
+      else {
+        return *this;
+      }
+   }
+
     /// getIntegerVTBitMask - Return an integer with 1's every place there are
     /// bits in the specified integer value type. FIXME: Should return an apint.
     uint64_t getIntegerVTBitMask() const {
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 844b8db13e..53ed0bea9c 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -33,21 +33,24 @@ def f128   : ValueType<128, 10>;   // 128-bit floating point value
 def ppcf128: ValueType<128, 11>;   // PPC 128-bit floating point value
 def FlagVT : ValueType<0  , 12>;   // Condition code or machine flag
 def isVoid : ValueType<0  , 13>;   // Produces no value
-def v8i8   : ValueType<64 , 14>;   //  8 x i8  vector value
-def v4i16  : ValueType<64 , 15>;   //  4 x i16 vector value
-def v2i32  : ValueType<64 , 16>;   //  2 x i32 vector value
-def v1i64  : ValueType<64 , 17>;   //  1 x i64 vector value
-
-def v16i8  : ValueType<128, 18>;   // 16 x i8  vector value
-def v8i16  : ValueType<128, 19>;   //  8 x i16 vector value
-def v3i32  : ValueType<96 , 20>;   //  3 x i32 vector value
-def v4i32  : ValueType<128, 21>;   //  4 x i32 vector value
-def v2i64  : ValueType<128, 22>;   //  2 x i64 vector value
-
-def v2f32  : ValueType<64,  23>;   //  2 x f32 vector value
-def v3f32  : ValueType<96 , 24>;   //  3 x f32 vector value
-def v4f32  : ValueType<128, 25>;   //  4 x f32 vector value
-def v2f64  : ValueType<128, 26>;   //  2 x f64 vector value
+def v2i8   : ValueType<16 , 14>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 15>;   //  4 x i8  vector value
+def v2i16  : ValueType<32 , 16>;   //  2 x i16 vector value
+def v8i8   : ValueType<64 , 17>;   //  8 x i8  vector value
+def v4i16  : ValueType<64 , 18>;   //  4 x i16 vector value
+def v2i32  : ValueType<64 , 19>;   //  2 x i32 vector value
+def v1i64  : ValueType<64 , 20>;   //  1 x i64 vector value
+
+def v16i8  : ValueType<128, 21>;   // 16 x i8  vector value
+def v8i16  : ValueType<128, 22>;   //  8 x i16 vector value
+def v3i32  : ValueType<96 , 23>;   //  3 x i32 vector value
+def v4i32  : ValueType<128, 24>;   //  4 x i32 vector value
+def v2i64  : ValueType<128, 25>;   //  2 x i64 vector value
+
+def v2f32  : ValueType<64,  26>;   //  2 x f32 vector value
+def v3f32  : ValueType<96 , 27>;   //  3 x f32 vector value
+def v4f32  : ValueType<128, 28>;   //  4 x f32 vector value
+def v2f64  : ValueType<128, 29>;   //  2 x f64 vector value
 
 // Pseudo valuetype mapped to the current pointer size to any address space.
 // Should only be used in TableGen.
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 71a1d893f1..2c655881ef 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -168,8 +168,7 @@ public:
     LegalizeAction getTypeAction(MVT VT) const {
       if (VT.isExtended()) {
         if (VT.isVector()) {
-          // First try vector widening
-          return Promote;
+          return VT.isPow2VectorType() ? Expand : Promote;
         }
         if (VT.isInteger())
           // First promote to a power-of-two size, then expand if necessary.
@@ -216,9 +215,15 @@ public:
     }
 
     if (VT.isVector()) {
-      unsigned NumElts = VT.getVectorNumElements();
-      MVT EltVT = VT.getVectorElementType();
-      return (NumElts == 1) ? EltVT : MVT::getVectorVT(EltVT, NumElts / 2);
+      MVT NVT = VT.getPow2VectorType();
+      if (NVT == VT) {
+        // Vector length is a power of 2 - split to half the size.
+        unsigned NumElts = VT.getVectorNumElements();
+        MVT EltVT = VT.getVectorElementType();
+        return (NumElts == 1) ? EltVT : MVT::getVectorVT(EltVT, NumElts / 2);
+      }
+      // Promote to a power of two size, avoiding multi-step promotion.
+      return getTypeAction(NVT) == Promote ? getTypeToTransformTo(NVT) : NVT;
     } else if (VT.isInteger()) {
       MVT NVT = VT.getRoundIntegerType();
       if (NVT == VT)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 18ba9125d1..4a2af06966 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -491,9 +491,11 @@ void SelectionDAGLegalize::HandleOp(SDValue Op) {
   MVT VT = Op.getValueType();
   // If the type legalizer was run then we should never see any illegal result
   // types here except for target constants (the type legalizer does not touch
-  // those).
+  // those) or for build vector used as a mask for a vector shuffle.
+  // FIXME: We can removed the BUILD_VECTOR case when we fix PR2957.
   assert((TypesNeedLegalizing || getTypeAction(VT) == Legal ||
-          Op.getOpcode() == ISD::TargetConstant) &&
+          Op.getOpcode() == ISD::TargetConstant ||
+          Op.getOpcode() == ISD::BUILD_VECTOR) &&
          "Illegal type introduced after type legalization?");
   switch (getTypeAction(VT)) {
   default: assert(0 && "Bad type action!");
@@ -3299,7 +3301,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                          0);
         break;
       }
-      
+
       // Check to see if we have a libcall for this operator.
       RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
       bool isSigned = false;
@@ -7646,7 +7648,10 @@ void SelectionDAGLegalize::SplitVectorOp(SDValue Op, SDValue &Lo,
   case ISD::XOR:
   case ISD::UREM:
   case ISD::SREM:
-  case ISD::FREM: {
+  case ISD::FREM:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL: {
     SDValue LL, LH, RL, RH;
     SplitVectorOp(Node->getOperand(0), LL, LH);
     SplitVectorOp(Node->getOperand(1), RL, RH);
@@ -8067,30 +8072,33 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
     SDValue Tmp1 = Node->getOperand(0);
     // Converts between two different types so we need to determine
     // the correct widen type for the input operand.
-    MVT TVT = Tmp1.getValueType();
-    assert(TVT.isVector() && "can not widen non vector type");
-    MVT TEVT = TVT.getVectorElementType();
-    assert(WidenVT.getSizeInBits() % EVT.getSizeInBits() == 0 &&
-         "can not widen bit bit convert that are not multiple of element type");
-    MVT TWidenVT =  MVT::getVectorVT(TEVT,
-                                   WidenVT.getSizeInBits()/EVT.getSizeInBits());
-    Tmp1 = WidenVectorOp(Tmp1, TWidenVT);
-    assert(Tmp1.getValueType().getSizeInBits() == WidenVT.getSizeInBits());
-    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1);
+    MVT InVT = Tmp1.getValueType();
+    unsigned WidenSize = WidenVT.getSizeInBits();    
+    if (InVT.isVector()) {
+      MVT InEltVT = InVT.getVectorElementType();
+      unsigned InEltSize = InEltVT.getSizeInBits();
+      assert(WidenSize % InEltSize == 0 &&
+             "can not widen bit convert that are not multiple of element type");
+      MVT NewInWidenVT = MVT::getVectorVT(InEltVT, WidenSize / InEltSize);
+      Tmp1 = WidenVectorOp(Tmp1, NewInWidenVT);
+      assert(Tmp1.getValueType().getSizeInBits() == WidenVT.getSizeInBits());
+      Result = DAG.getNode(ISD::BIT_CONVERT, WidenVT, Tmp1);
+    } else {
+      // If the result size is a multiple of the input size, widen the input
+      // and then convert.
+      unsigned InSize = InVT.getSizeInBits();
+      assert(WidenSize % InSize == 0 &&
+             "can not widen bit convert that are not multiple of element type");
+      unsigned NewNumElts = WidenSize / InSize;
+      SmallVector<SDValue, 16> Ops(NewNumElts);
+      SDValue UndefVal = DAG.getNode(ISD::UNDEF, InVT);
+      Ops[0] = Tmp1;
+      for (unsigned i = 1; i < NewNumElts; ++i)
+        Ops[i] = UndefVal;
 
-    TargetLowering::LegalizeAction action =
-      TLI.getOperationAction(Node->getOpcode(), WidenVT);
-    switch (action)  {
-    default: assert(0 && "action not supported");
-    case TargetLowering::Legal:
-        break;
-    case TargetLowering::Promote:
-        // We defer the promotion to when we legalize the op
-      break;
-    case TargetLowering::Expand:
-      // Expand the operation into a bunch of nasty scalar code.
-      Result = LegalizeOp(UnrollVectorOp(Result));
-      break;
+      MVT NewInVT = MVT::getVectorVT(InVT, NewNumElts);
+      Result = DAG.getNode(ISD::BUILD_VECTOR, NewInVT, &Ops[0], NewNumElts);
+      Result = DAG.getNode(ISD::BIT_CONVERT, WidenVT, Result);
     }
     break;
   }
@@ -8098,7 +8106,8 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
   case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT: {
+  case ISD::FP_TO_UINT:
+  case ISD::FP_ROUND: {
     SDValue Tmp1 = Node->getOperand(0);
     // Converts between two different types so we need to determine
     // the correct widen type for the input operand.
@@ -8118,7 +8127,6 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:
-  case ISD::FP_ROUND:
   case ISD::SIGN_EXTEND_INREG:
   case ISD::FABS:
   case ISD::FNEG:
@@ -8129,7 +8137,7 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
   case ISD::CTTZ:
   case ISD::CTLZ: {
     // Unary op widening
-    SDValue Tmp1;    
+    SDValue Tmp1;
     Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
     assert(Tmp1.getValueType() == WidenVT);
     Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1);
@@ -8306,7 +8314,7 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
     MVT TmpWidenVT =  MVT::getVectorVT(TmpEVT, NewNumElts);
     Tmp1 = WidenVectorOp(Tmp1, TmpWidenVT);
     SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), TmpWidenVT);
-    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1, Tmp2, 
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1, Tmp2,
                          Node->getOperand(2));
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8abf1448db..5f23677880 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -217,7 +217,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
     // Convert the element to an integer and promote it by hand.
     return DAG.getNode(ISD::ANY_EXTEND, NOutVT,
                        BitConvertToInteger(GetScalarizedVector(InOp)));
-  case SplitVector:
+  case SplitVector: {
     // For example, i32 = BIT_CONVERT v2i16 on alpha.  Convert the split
     // pieces of the input into integers and reassemble in the final type.
     SDValue Lo, Hi;
@@ -233,9 +233,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
                        JoinIntegers(Lo, Hi));
     return DAG.getNode(ISD::BIT_CONVERT, NOutVT, InOp);
   }
+  case WidenVector:
+    if (OutVT.bitsEq(NInVT))
+      // The input is widened to the same size.  Convert to the widened value.
+      return DAG.getNode(ISD::BIT_CONVERT, OutVT, GetWidenedVector(InOp));
+  }
 
   // Otherwise, lower the bit-convert to a store/load from the stack.
-
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and destination types.
   SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 8454dda303..704c537bb4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -108,6 +108,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
         Mapped |= 32;
       if (SplitVectors.find(Res) != SplitVectors.end())
         Mapped |= 64;
+      if (WidenedVectors.find(Res) != WidenedVectors.end())
+        Mapped |= 128;
 
       if (I->getNodeId() != Processed) {
         if (Mapped != 0) {
@@ -115,7 +117,10 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
           Failed = true;
         }
       } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
-        if (Mapped > 1) {
+        // FIXME: Because of PR2957, the build vector can be placed on this
+        // list but if the associated vector shuffle is split, the build vector
+        // can also be split so we allow this to go through for now.
+        if (Mapped > 1 && Res.getOpcode() != ISD::BUILD_VECTOR) {
           cerr << "Value with legal type was transformed!";
           Failed = true;
         }
@@ -144,6 +149,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
           cerr << " ExpandedFloats";
         if (Mapped & 64)
           cerr << " SplitVectors";
+        if (Mapped & 128)
+          cerr << " WidenedVectors";
         cerr << "\n";
         abort();
       }
@@ -241,6 +248,10 @@ bool DAGTypeLegalizer::run() {
         SplitVectorResult(N, i);
         Changed = true;
         goto NodeDone;
+      case WidenVector:
+        WidenVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
       }
     }
 
@@ -255,6 +266,13 @@ ScanOperands:
       if (IgnoreNodeResults(N->getOperand(i).getNode()))
         continue;
 
+      if (N->getOpcode() == ISD::VECTOR_SHUFFLE && i == 2) {
+        // The shuffle mask doesn't need to be a legal vector type.
+        // FIXME: We can remove this once we fix PR2957.
+        SetIgnoredNodeResult(N->getOperand(2).getNode());
+        continue;
+      }
+
       MVT OpVT = N->getOperand(i).getValueType();
       switch (getTypeAction(OpVT)) {
       default:
@@ -288,6 +306,10 @@ ScanOperands:
         NeedsReanalyzing = SplitVectorOperand(N, i);
         Changed = true;
         break;
+      case WidenVector:
+        NeedsReanalyzing = WidenVectorOperand(N, i);
+        Changed = true;
+        break;
       }
       break;
     }
@@ -791,6 +813,18 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
   Entry.second = Hi;
 }
 
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = WidenedVectors[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+  OpEntry = Result;
+}
+
+// Set to ignore result
+void DAGTypeLegalizer::SetIgnoredNodeResult(SDNode* N) {
+  IgnoredNodesResultsSet.insert(N);
+}
 
 //===----------------------------------------------------------------------===//
 // Utilities.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9a42c5656e..2d4cde813f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 
@@ -63,7 +64,8 @@ private:
     SoftenFloat,     // Convert this float type to a same size integer type.
     ExpandFloat,     // Split this float type into two of half the size.
     ScalarizeVector, // Replace this one-element vector with its element type.
-    SplitVector      // This vector type should be split into smaller vectors.
+    SplitVector,     // This vector type should be split into smaller vectors.
+    WidenVector      // This vector type should be widened into larger vectors.
   };
 
   /// ValueTypeActions - This is a bitvector that contains two bits for each
@@ -88,11 +90,8 @@ private:
       //   2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
       if (!VT.isVector())
         return PromoteInteger;
-      else if (VT.getVectorNumElements() == 1)
-        return ScalarizeVector;
       else
-        // TODO: move widen code to LegalizeTypes.
-        return SplitVector;
+        return WidenVector;
     case TargetLowering::Expand:
       // Expand can mean
       // 1) split scalar in half, 2) convert a float to an integer,
@@ -120,9 +119,13 @@ private:
 
   /// IgnoreNodeResults - Pretend all of this node's results are legal.
   bool IgnoreNodeResults(SDNode *N) const {
-    return N->getOpcode() == ISD::TargetConstant;
+    return N->getOpcode() == ISD::TargetConstant ||
+           IgnoredNodesResultsSet.count(N);
   }
 
+  /// IgnoredNode - Set of nodes whose result don't need to be legal.
+  DenseSet<SDNode*> IgnoredNodesResultsSet;
+
   /// PromotedIntegers - For integer nodes that are below legal width, this map
   /// indicates what promoted value to use.
   DenseMap<SDValue, SDValue> PromotedIntegers;
@@ -147,6 +150,10 @@ private:
   /// which operands are the expanded version of the input.
   DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
 
+  /// WidenVectors - For vector nodes that need to be widened, indicates
+  /// the widen value to use.
+  DenseMap<SDValue, SDValue> WidenedVectors;
+
   /// ReplacedValues - For values that have been replaced with another,
   /// indicates the replacement value to use.
   DenseMap<SDValue, SDValue> ReplacedValues;
@@ -201,6 +208,8 @@ private:
 
   SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index);
 
+  void SetIgnoredNodeResult(SDNode* N);
+
   //===--------------------------------------------------------------------===//
   // Integer Promotion Support: LegalizeIntegerTypes.cpp
   //===--------------------------------------------------------------------===//
@@ -563,6 +572,91 @@ private:
   SDValue SplitVecOp_VECTOR_SHUFFLE(SDNode *N, unsigned OpNo);
 
   //===--------------------------------------------------------------------===//
+  // Vector Widening Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+  SDValue GetWidenedVector(SDValue Op) {
+    SDValue &WidenedOp = WidenedVectors[Op];
+    RemapValue(WidenedOp);
+    assert(WidenedOp.getNode() && "Operand wasn't widened?");
+    return WidenedOp;
+  }
+  void SetWidenedVector(SDValue Op, SDValue Result);
+
+  // Widen Vector Result Promotion.
+  void WidenVectorResult(SDNode *N, unsigned ResNo);
+  SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+  SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+  SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+  SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+  SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+  SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_LOAD(SDNode* N);
+  SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+  SDValue WidenVecRes_SELECT(SDNode* N);
+  SDValue WidenVecRes_SELECT_CC(SDNode* N);
+  SDValue WidenVecRes_UNDEF(SDNode *N);
+  SDValue WidenVecRes_VECTOR_SHUFFLE(SDNode *N);
+  SDValue WidenVecRes_VSETCC(SDNode* N);
+
+  SDValue WidenVecRes_Binary(SDNode *N);
+  SDValue WidenVecRes_Convert(SDNode *N);
+  SDValue WidenVecRes_Shift(SDNode *N);
+  SDValue WidenVecRes_Unary(SDNode *N);
+
+  // Widen Vector Operand.
+  bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+  SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue WidenVecOp_STORE(SDNode* N);
+
+  SDValue WidenVecOp_Convert(SDNode *N);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// Helper genWidenVectorLoads - Helper function to generate a set of
+  /// loads to load a vector with a resulting wider type. It takes
+  ///   ExtType: Extension type
+  ///   LdChain: list of chains for the load we have generated.
+  ///   Chain:   incoming chain for the ld vector.
+  ///   BasePtr: base pointer to load from.
+  ///   SV:         memory disambiguation source value.
+  ///   SVOffset:   memory disambiugation offset.
+  ///   Alignment:  alignment of the memory.
+  ///   isVolatile: volatile load.
+  ///   LdWidth:    width of memory that we want to load. 
+  ///   ResType:    the wider result result type for the resulting vector.
+  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain,
+                              SDValue BasePtr, const Value *SV,
+                              int SVOffset, unsigned Alignment,
+                              bool isVolatile, unsigned LdWidth,
+                              MVT ResType);
+
+  /// Helper genWidenVectorStores - Helper function to generate a set of
+  /// stores to store a widen vector into non widen memory
+  /// It takes
+  ///   StChain: list of chains for the stores we have generated
+  ///   Chain:   incoming chain for the ld vector
+  ///   BasePtr: base pointer to load from
+  ///   SV:      memory disambiguation source value
+  ///   SVOffset:   memory disambiugation offset
+  ///   Alignment:  alignment of the memory
+  ///   isVolatile: volatile lod
+  ///   ValOp:   value to store  
+  ///   StWidth: width of memory that we want to store 
+  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain,
+                            SDValue BasePtr, const Value *SV,
+                            int SVOffset, unsigned Alignment,
+                            bool isVolatile, SDValue ValOp,
+                            unsigned StWidth);
+
+  /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+  /// input vector must have the same element type as NVT.
+  SDValue ModifyToType(SDValue InOp, MVT WidenVT);
+
+
+  //===--------------------------------------------------------------------===//
   // Generic Splitting: LegalizeTypesGeneric.cpp
   //===--------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5f15fa506d..5c34db489b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -371,7 +371,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
-  case ISD::LOAD:           SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
+  case ISD::LOAD:              SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
   case ISD::VECTOR_SHUFFLE:    SplitVecRes_VECTOR_SHUFFLE(N, Lo, Hi); break;
   case ISD::VSETCC:            SplitVecRes_VSETCC(N, Lo, Hi); break;
 
@@ -1062,3 +1062,1034 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_SHUFFLE(SDNode *N, unsigned OpNo) {
   assert(false && "Failed to find an appropriate mask type!");
   return SDValue(N, 0);
 }
+
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG);
+        cerr << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    cerr << "WidenVectorResult #" << ResNo << ": ";
+    N->dump(&DAG); cerr << "\n";
+#endif
+    assert(0 && "Do not know how to widen the result of this operator!");
+    abort();
+
+  case ISD::BIT_CONVERT:       Res = WidenVecRes_BIT_CONVERT(N); break;
+  case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
+  case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
+  case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+  case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
+  case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
+  case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
+  case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
+  case ISD::VECTOR_SHUFFLE:    Res = WidenVecRes_VECTOR_SHUFFLE(N); break;
+  case ISD::VSETCC:            Res = WidenVecRes_VSETCC(N); break;
+
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::BSWAP:
+  case ISD::FADD:
+  case ISD::FCOPYSIGN:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FPOW:
+  case ISD::FPOWI:
+  case ISD::FREM:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
+  case ISD::OR:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::SUB:
+  case ISD::XOR:               Res = WidenVecRes_Binary(N); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:               Res = WidenVecRes_Shift(N); break;
+
+  case ISD::ANY_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SIGN_EXTEND:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::ZERO_EXTEND:
+  case ISD::UINT_TO_FP:        Res = WidenVecRes_Convert(N); break;
+
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCOS:
+  case ISD::FNEG:
+  case ISD::FSIN:
+  case ISD::FSQRT:             Res = WidenVecRes_Unary(N); break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (Res.getNode())
+    SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+  // Binary op widening.
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  MVT InVT = InOp.getValueType();
+  MVT InEltVT = InVT.getVectorElementType();
+  MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+
+  unsigned Opcode = N->getOpcode();
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+
+  if (getTypeAction(InVT) == WidenVector) {
+    InOp = GetWidenedVector(N->getOperand(0));
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts)
+      return DAG.getNode(Opcode, WidenVT, InOp);
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getNode(ISD::UNDEF, InVT);
+      for (unsigned i = 1; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      return DAG.getNode(Opcode, WidenVT,
+                         DAG.getNode(ISD::CONCAT_VECTORS, InWidenVT,
+                         &Ops[0], NumConcat));
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      // Extract the input and convert the shorten input vector.
+      return DAG.getNode(Opcode, WidenVT,
+                         DAG.getNode(ISD::EXTRACT_SUBVECTOR, InWidenVT, InOp,
+                                     DAG.getIntPtrConstant(0)));
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  MVT EltVT = WidenVT.getVectorElementType();
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i)
+    Ops[i] = DAG.getNode(Opcode, EltVT,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, InEltVT, InOp,
+                                     DAG.getIntPtrConstant(i)));
+
+  SDValue UndefVal = DAG.getNode(ISD::UNDEF, EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+  MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  SDValue ShOp = N->getOperand(1);
+
+  MVT ShVT = ShOp.getValueType();
+  if (getTypeAction(ShVT) == WidenVector) {
+    ShOp = GetWidenedVector(ShOp);
+    ShVT = ShOp.getValueType();
+  }
+  MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(),
+                                   WidenVT.getVectorNumElements());
+  if (ShVT != ShWidenVT) 
+    ShOp = ModifyToType(ShOp, ShWidenVT);
+