From 3ef9dfa6858e25015c3e36b2f1a0ba5ebdea80d2 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 25 Oct 2012 21:03:48 +0000 Subject: LoopVectorize: Teach the cost model to query scalar costs as scalar types and not vectors of 1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166715 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 102 +++++++++++++++++------------ 1 file changed, 61 insertions(+), 41 deletions(-) (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 423c7a4911..e47baf8908 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -324,6 +324,11 @@ private: /// width. Vector width of one means scalar. unsigned getInstructionCost(Instruction *I, unsigned VF); + /// A helper function for converting Scalar types to vector types. + /// If the incoming type is void, we return void. If the VF is 1, we return + /// the scalar type. + static Type* ToVectorTy(Type *Scalar, unsigned VF); + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -1478,8 +1483,16 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { assert(VTTI && "Invalid vector target transformation info"); + + Type *RetTy = I->getType(); + Type *VectorTy = ToVectorTy(RetTy, VF); + + // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: + // We mark this instruction as zero-cost because scalar GEPs are usually + // lowered to the intruction addressing mode. At the moment we don't + // generate vector geps. return 0; case Instruction::Br: { return VTTI->getInstrCost(I->getOpcode()); @@ -1504,74 +1517,76 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - Type *VTy = VectorType::get(I->getType(), VF); - return VTTI->getInstrCost(I->getOpcode(), VTy); + return VTTI->getInstrCost(I->getOpcode(), VectorTy); } case Instruction::Select: { SelectInst *SI = cast(I); - Type *VTy = VectorType::get(I->getType(), VF); const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); if (ScalarCond) CondTy = VectorType::get(CondTy, VF); - return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy); + return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy); } case Instruction::ICmp: case Instruction::FCmp: { - Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF); - return VTTI->getInstrCost(I->getOpcode(), VTy); + Type *ValTy = I->getOperand(0)->getType(); + VectorTy = ToVectorTy(ValTy, VF); + return VTTI->getInstrCost(I->getOpcode(), VectorTy); } case Instruction::Store: { StoreInst *SI = cast(I); - Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF); + Type *ValTy = SI->getValueOperand()->getType(); + VectorTy = ToVectorTy(ValTy, VF); + + if (VF == 1) + return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), SI->getPointerAddressSpace()); // Scalarized stores. if (!Legal->isConsecutiveGep(SI->getPointerOperand())) { unsigned Cost = 0; - if (VF != 1) { - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, - VTy); - // The cost of extracting from the value vector and pointer vector. - Cost += VF * (ExtCost * 2); - } + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + ValTy); + // The cost of extracting from the value vector. + Cost += VF * (ExtCost); // The cost of the scalar stores. Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), - VTy->getScalarType(), + ValTy->getScalarType(), SI->getAlignment(), SI->getPointerAddressSpace()); return Cost; } // Wide stores. - return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(), + return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { LoadInst *LI = cast(I); - Type *VTy = VectorType::get(I->getType(), VF); + + if (VF == 1) + return VTTI->getMemoryOpCost(I->getOpcode(), RetTy, + LI->getAlignment(), + LI->getPointerAddressSpace()); // Scalarized loads. if (!Legal->isConsecutiveGep(LI->getPointerOperand())) { unsigned Cost = 0; - if (VF != 1) { - unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); - unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy); - - // The cost of inserting the loaded value into the result vector, and - // extracting from a vector of pointers. - Cost += VF * (InCost + ExCost); - } + unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy); + // The cost of inserting the loaded value into the result vector. + Cost += VF * (InCost); // The cost of the scalar stores. - Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(), + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), + RetTy->getScalarType(), LI->getAlignment(), LI->getPointerAddressSpace()); return Cost; } // Wide loads. - return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(), + return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), LI->getPointerAddressSpace()); } case Instruction::ZExt: @@ -1586,35 +1601,40 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF); - Type *DstTy = VectorType::get(I->getType(), VF); - return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy); + Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); + return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } default: { // We are scalarizing the instruction. Return the cost of the scalar // instruction, plus the cost of insert and extract into vector // elements, times the vector width. unsigned Cost = 0; - Type *Ty = I->getType(); - if (!Ty->isVoidTy()) { - Type *VTy = VectorType::get(Ty, VF); - unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy); - Cost += VF * (InsCost + ExtCost); - } + bool IsVoid = RetTy->isVoidTy(); - /// We don't have any information on the scalar instruction, but maybe - /// the target has. - /// TODO: This may be a target-specific intrinsic. - /// Need to add API for that. - Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty); + unsigned InsCost = (IsVoid ? 0 : + VTTI->getInstrCost(Instruction::InsertElement, + VectorTy)); + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + VectorTy); + + // The cost of inserting the results plus extracting each one of the + // operands. + Cost += VF * (InsCost + ExtCost * I->getNumOperands()); + + // The cost of executing VF copies of the scalar instruction. + Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy); return Cost; } }// end of switch. } +Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) { + if (Scalar->isVoidTy() || VF == 1) + return Scalar; + return VectorType::get(Scalar, VF); +} } // namespace -- cgit v1.2.3-70-g09d2 From a5a3a61c5fdcee972791d4e08441ba6edf131b88 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 26 Oct 2012 23:49:28 +0000 Subject: Refactor the VectorTargetTransformInfo interface. Add getCostXXX calls for different families of opcodes, such as casts, arithmetic, cmp, etc. Port the LoopVectorizer to the new API. The LoopVectorizer now finds instructions which will remain uniform after vectorization. It uses this information when calculating the cost of these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166836 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 7 ++ include/llvm/Target/TargetTransformImpl.h | 19 ++- include/llvm/TargetTransformInfo.h | 32 ++++- lib/Target/TargetTransformImpl.cpp | 152 +++++++++++++++++++----- lib/Transforms/Vectorize/LoopVectorize.cpp | 61 ++++++++-- test/Transforms/LoopVectorize/X86/cost-model.ll | 2 +- 6 files changed, 234 insertions(+), 39 deletions(-) (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 19eb941635..9d0aeaa356 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -411,6 +411,13 @@ public: getOperationAction(Op, VT) == Custom); } + /// isOperationExpand - Return true if the specified operation is illegal on + /// this target or unlikely to be made legal with custom lowering. This is + /// used to help guide high-level lowering decisions. + bool isOperationExpand(unsigned Op, EVT VT) const { + return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); + } + /// isOperationLegal - Return true if the specified operation is legal on this /// target. bool isOperationLegal(unsigned Op, EVT VT) const { diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h index 133be87194..fd4b737afd 100644 --- a/include/llvm/Target/TargetTransformImpl.h +++ b/include/llvm/Target/TargetTransformImpl.h @@ -56,15 +56,32 @@ private: std::pair getTypeLegalizationCost(LLVMContext &C, EVT Ty) const; + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + public: explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {} - + virtual ~VectorTargetTransformImpl() {} virtual unsigned getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getBroadcastCost(Type *Tp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + + virtual unsigned getCFInstrCost(unsigned Opcode) const; + + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const; diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h index 71c78ec52e..96761594fb 100644 --- a/include/llvm/TargetTransformInfo.h +++ b/include/llvm/TargetTransformInfo.h @@ -143,13 +143,43 @@ public: return 1; } + /// Returns the expected cost of arithmetic ops, such as mul, xor, fsub, etc. + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + return 1; + } + /// Returns the cost of a vector broadcast of a scalar at place zero to a /// vector of type 'Tp'. virtual unsigned getBroadcastCost(Type *Tp) const { return 1; } - /// Returns the cost of Load and Store instructions. + /// Returns the expected cost of cast instructions, such as bitcast, trunc, + /// zext, etc. + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return 1; + } + + /// Returns the expected cost of control-flow related instrutctions such as + /// Phi, Ret, Br. + virtual unsigned getCFInstrCost(unsigned Opcode) const { + return 1; + } + + /// Returns the expected cost of compare and select instructions. + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = 0) const { + return 1; + } + + /// Returns the expected cost of vector Insert and Extract. + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = 0) const { + return 1; + } + + /// Returns the cost of Load and Store instructions. virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index 40184ed78d..d3ab105988 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -126,7 +126,7 @@ static int InstructionOpcodeToISD(unsigned Opcode) { std::pair VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, - EVT Ty) const { + EVT Ty) const { unsigned Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that // the only operation that costs anything is the split. After splitting @@ -135,7 +135,7 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty); if (LK.first == TargetLowering::TypeLegal) - return std::make_pair(Cost, LK.second); + return std::make_pair(Cost, Ty); if (LK.first == TargetLowering::TypeSplitVector) Cost *= 2; @@ -146,44 +146,144 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, } unsigned -VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, - Type *Ty2) const { +VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty, + bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode, + Type *Ty) const { // Check if any of the operands are vector operands. int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair LT = + getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty)); + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + // Else, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Ty, true, true) + Num * Cost; + } + + // We don't know anything about this scalar instruction. + return 1; +} + +unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { + return 1; +} + +unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + assert(Src->isVectorTy() == Dst->isVectorTy() && "Invalid input types"); + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); - // If we don't have any information about this instruction assume it costs 1. - if (ISD == 0) - return 1; + std::pair SrcLT = + getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); + std::pair DstLT = + getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst)); + + // If the cast is between same-sized registers, then the check is simple. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + // Just check the op cost: + if (!TLI->isOperationExpand(ISD, DstLT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return SrcLT.first * 1; + } + } + + // Otherwise, assume that the cast is scalarized. + if (Dst->isVectorTy()) { + unsigned Num = Dst->getVectorNumElements(); + unsigned Cost = getCastInstrCost(Opcode, Src->getScalarType(), + Dst->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Dst, true, true) + Num * Cost; + } + + // Unknown scalar opcode. + return 1; +} + +unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const { + return 1; +} + +unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode, + Type *ValTy, + Type *CondTy) const { + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + // Selects on vectors are actually vector selects. if (ISD == ISD::SELECT) { - assert(Ty2 && "Ty2 must hold the condition type"); - if (Ty2->isVectorTy()) - ISD = ISD::VSELECT; + assert(CondTy && "CondTy must exist"); + if (CondTy->isVectorTy()) + ISD = ISD::VSELECT; } - assert(Ty1 && "We need to have at least one type"); + std::pair LT = + getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy)); - // From this stage we look at the legalized type. - std::pair LT = - getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1)); - - if (TLI->isOperationLegalOrCustom(ISD, LT.second)) { + if (!TLI->isOperationExpand(ISD, LT.second)) { // The operation is legal. Assume it costs 1. Multiply // by the type-legalization overhead. return LT.first * 1; } - unsigned NumElem = - (LT.second.isVector() ? LT.second.getVectorNumElements() : 1); + // Otherwise, assume that the cast is scalarized. + if (ValTy->isVectorTy()) { + unsigned Num = ValTy->getVectorNumElements(); + if (CondTy) + CondTy = CondTy->getScalarType(); + unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(), + CondTy); + + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(ValTy, true, false) + Num * Cost; + } - // We will probably scalarize this instruction. Assume that the cost is the - // number of the vector elements. - return LT.first * NumElem * 1; + // Unknown scalar opcode. + return 1; +} + +/// Returns the expected cost of Vector Insert and Extract. +unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode, + Type *Val, + unsigned Index) const { + return 1; } unsigned -VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { +VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, + Type *Ty2) const { return 1; } @@ -191,17 +291,15 @@ unsigned VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - // From this stage we look at the legalized type. - std::pair LT = + std::pair LT = getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); + // Assume that all loads of legal types cost 1. return LT.first; } unsigned VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const { - std::pair LT = - getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp)); - return LT.first; + return TLI->getNumRegisters(Tp->getContext(), TLI->getValueType(Tp)); } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index e47baf8908..1773812da2 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -108,7 +108,7 @@ public: createEmptyLoop(Legal); /// Widen each instruction in the old loop to a new one in the new loop. /// Use the Legality module to find the induction and reduction variables. - vectorizeLoop(Legal); + vectorizeLoop(Legal); // register the new loop. cleanup(); } @@ -254,6 +254,9 @@ public: /// This check allows us to vectorize A[idx] into a wide load/store. bool isConsecutiveGep(Value *Ptr); + /// Returns true if this instruction will remain scalar after vectorization. + bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} + private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -291,6 +294,9 @@ private: /// Allowed outside users. This holds the reduction /// vars which can be accessed from outside the loop. SmallPtrSet AllowedExit; + /// This set holds the variables which are known to be uniform after + /// vectorization. + SmallPtrSet Uniforms; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1177,9 +1183,40 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { return false; } - // If the memory dependencies do not prevent us from - // vectorizing, then vectorize. - return canVectorizeMemory(BB); + // Don't vectorize if the memory dependencies do not allow vectorization. + if (!canVectorizeMemory(BB)) + return false; + + // We now know that the loop is vectorizable! + // Collect variables that will remain uniform after vectorization. + std::vector Worklist; + + // Start with the conditional branch and walk up the block. + Worklist.push_back(BB.getTerminator()->getOperand(0)); + + while (Worklist.size()) { + Instruction *I = dyn_cast(Worklist.back()); + Worklist.pop_back(); + // Look at instructions inside this block. + if (!I) continue; + if (I->getParent() != &BB) continue; + + // Stop when reaching PHI nodes. + if (isa(I)) { + assert(I == Induction && "Found a uniform PHI that is not the induction"); + break; + } + + // This is a known uniform. + Uniforms.insert(I); + + // Insert all operands. + for (int i=0, Op = I->getNumOperands(); i < Op; ++i) { + Worklist.push_back(I->getOperand(i)); + } + } + + return true; } bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { @@ -1484,9 +1521,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { assert(VTTI && "Invalid vector target transformation info"); + // If we know that this instruction will remain uniform, check the cost of + // the scalar version. + if (Legal->isUniformAfterVectorization(I)) + VF = 1; + Type *RetTy = I->getType(); Type *VectorTy = ToVectorTy(RetTy, VF); + // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: @@ -1495,7 +1538,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // generate vector geps. return 0; case Instruction::Br: { - return VTTI->getInstrCost(I->getOpcode()); + return VTTI->getCFInstrCost(I->getOpcode()); } case Instruction::PHI: return 0; @@ -1517,7 +1560,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - return VTTI->getInstrCost(I->getOpcode(), VectorTy); + return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy); } case Instruction::Select: { SelectInst *SI = cast(I); @@ -1527,13 +1570,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { if (ScalarCond) CondTy = VectorType::get(CondTy, VF); - return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy); + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); VectorTy = ToVectorTy(ValTy, VF); - return VTTI->getInstrCost(I->getOpcode(), VectorTy); + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy); } case Instruction::Store: { StoreInst *SI = cast(I); @@ -1602,7 +1645,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); - return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy); + return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } default: { // We are scalarizing the instruction. Return the cost of the scalar diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll index 628f9912c8..40e660855b 100644 --- a/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0" @a = common global [2048 x i32] zeroinitializer, align 16 ;CHECK: cost_model_1 -;CHECK: <4 x i32> +;CHECK-NOT: <4 x i32> ;CHECK: ret void define void @cost_model_1() nounwind uwtable noinline ssp { entry: -- cgit v1.2.3-70-g09d2 From f065a8467785015336432e3e6e584798d8b48d8e Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 27 Oct 2012 04:11:32 +0000 Subject: 1. Fix a bug in getTypeConversion. When a *simple* type is split, we need to return the type of the split result. 2. Change the maximum vectorization width from 4 to 8. 3. A test for both. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166864 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 3 ++ lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- test/Transforms/LoopVectorize/X86/gcc-examples.ll | 62 +++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/LoopVectorize/X86/gcc-examples.ll (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 9d0aeaa356..13f80fda3d 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1980,6 +1980,9 @@ public: ValueTypeActions.getTypeAction(NVT.getSimpleVT()) != TypePromoteInteger) && "Promote may not follow Expand or Promote"); + if (LA == TypeSplitVector) + NVT = EVT::getVectorVT(Context, VT.getVectorElementType(), + VT.getVectorNumElements() / 2); return LegalizeKind(LA, NVT); } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 1773812da2..be197db956 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -317,7 +317,7 @@ public: /// Returns the most profitable vectorization factor for the loop that is /// smaller or equal to the VF argument. This method checks every power /// of two up to VF. - unsigned findBestVectorizationFactor(unsigned VF = 4); + unsigned findBestVectorizationFactor(unsigned VF = 8); private: /// Returns the expected execution cost. The unit of the cost does diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll new file mode 100644 index 0000000000..e7a63c9316 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +; Select VF = 8; +;CHECK: @example1 +;CHECK: load <8 x i32> +;CHECK: add <8 x i32> +;CHECK: store <8 x i32> +;CHECK: ret void +define void @example1() nounwind uwtable ssp { + br label %1 + +;