diff options
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r-- | lib/Transforms/Utils/IntegerDivision.cpp | 122 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 280 | ||||
-rw-r--r-- | lib/Transforms/Utils/ValueMapper.cpp | 2 |
3 files changed, 315 insertions, 89 deletions
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 9d630349ab..55227e2714 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -23,11 +23,69 @@ using namespace llvm; +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); + Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + /// Generate code to divide two signed integers. Returns the quotient, rounded -/// towards 0. Builder's insert point should be pointing at the sdiv -/// instruction. This will generate a udiv in the process, and Builder's insert -/// point will be pointing at the udiv (if present, i.e. not folded), ready to -/// be expanded if the user wishes. +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // Implementation taken from compiler-rt's __divsi3 @@ -62,8 +120,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, } /// Generates code to divide two unsigned scalar 32-bit integers. Returns the -/// quotient, rounded towards 0. Builder's insert point should be pointing at -/// the udiv instruction. +/// quotient, rounded towards 0. Builder's insert point should be pointing where +/// the caller wants code generated, e.g. at the udiv instruction. static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // The basic algorithm can be found in the compiler-rt project's @@ -265,6 +323,56 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, return Q_5; } +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. Currently only implements 32bit +/// scalar division (due to udiv's limitation), but future work is removing this +/// limitation. +/// +/// @brief Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::URem) + return true; + + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + + /// Generate code to divide two integers, replacing Div with the generated /// code. This currently generates code similarly to compiler-rt's /// implementations, but future work includes generating more specialized code @@ -287,7 +395,7 @@ bool llvm::expandDivision(BinaryOperator *Div) { if (Div->getOpcode() == Instruction::SDiv) { // Lower the code to unsigned division, and reset Div to point to the udiv. Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), - Div->getOperand(1), Builder); + Div->getOperand(1), Builder); Div->replaceAllUsesWith(Quotient); Div->dropAllReferences(); Div->eraseFromParent(); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 876ff2c337..065325b7c2 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -58,9 +58,10 @@ static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); -STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -3240,83 +3241,227 @@ static bool GetCaseResults(SwitchInst *SI, return true; } -/// BuildLookupTable - Build a lookup table with the contents of Results, using -/// DefaultResult to fill the holes in the table. If the table ends up -/// containing the same result in each element, set *SingleResult to that value -/// and return NULL. -static GlobalVariable *BuildLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results, - Constant *DefaultResult, - Constant **SingleResult) { - assert(Results.size() && "Need values to build lookup table"); - assert(TableSize >= Results.size() && "Table needs to hold all values"); +namespace { + /// SwitchLookupTable - This class represents a lookup table that can be used + /// to replace a switch. + class SwitchLookupTable { + public: + /// SwitchLookupTable - Create a lookup table to use as a switch replacement + /// with the contents of Values, using DefaultValue to fill any holes in the + /// table. + SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const TargetData *TD); + + /// BuildLookup - Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType); + + private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For ArrayKind, this is the array. + GlobalVariable *Array; + }; +} + +SwitchLookupTable::SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const TargetData *TD) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); // If all values in the table are equal, this is that value. - Constant *SameResult = Results.begin()->second; + SingleValue = Values.begin()->second; // Build up the table contents. - std::vector<Constant*> TableContents(TableSize); - for (size_t I = 0, E = Results.size(); I != E; ++I) { - ConstantInt *CaseVal = Results[I].first; - Constant *CaseRes = Results[I].second; - - uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + SmallVector<Constant*, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == DefaultValue->getType()); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) + .getLimitedValue(); TableContents[Idx] = CaseRes; - if (CaseRes != SameResult) - SameResult = NULL; + if (CaseRes != SingleValue) + SingleValue = NULL; } // Fill in any holes in the table with the default result. - if (Results.size() < TableSize) { - for (unsigned i = 0; i < TableSize; ++i) { - if (!TableContents[i]) - TableContents[i] = DefaultResult; + if (Values.size() < TableSize) { + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; } - if (DefaultResult != SameResult) - SameResult = NULL; + if (DefaultValue != SingleValue) + SingleValue = NULL; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; } - // Same result was used in the entire table; just return that. - if (SameResult) { - *SingleResult = SameResult; - return NULL; + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast<IntegerType>(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; } - ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, - GlobalVariable::PrivateLinkage, - Initializer, - "switch.table"); - GV->setUnnamedAddr(true); - return GV; + Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + Array->setUnnamedAddr(true); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } + case ArrayKind: { + Value *GEPIndices[] = { Builder.getInt32(0), Index }; + Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, + "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX/IT->getBitWidth()) + return false; + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// ShouldBuildLookupTable - Determine whether a lookup table should be built +/// for this switch, based on the number of caes, size of the table and the +/// types of the results. +static bool ShouldBuildLookupTable(SwitchInst *SI, + uint64_t TableSize, + const TargetData *TD, + const SmallDenseMap<PHINode*, Type*>& ResultTypes) { + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + if (SI->getNumCases() * 10 >= TableSize * 4) + return true; + + // If each table would fit in a register, we should build it anyway. + for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second)) + return false; + } + return true; } /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + const TargetData* TD) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // FIXME: Handle unreachable cases. // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. - // FIXME: If the results are all integers and the lookup table would fit in a - // target-legal register, we should store them as a bitmap and use shift/mask - // to look up the result. - // FIXME: This creates arrays of GEPs to constant strings, which means each // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. - // Ignore the switch if the number of cases are too small. + // Ignore the switch if the number of cases is too small. // This is similar to the check when building jump tables in // SelectionDAGBuilder::handleJTSwitchCase. // FIXME: Determine the best cut-off. @@ -3370,33 +3515,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, } APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - // The table density should be at lest 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - // Be careful to avoid overlow in the density computation. - if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) - return false; uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (SI->getNumCases() * 10 < TableSize * 4) + if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes)) return false; - // Build the lookup tables. - SmallDenseMap<PHINode*, GlobalVariable*> LookupTables; - SmallDenseMap<PHINode*, Constant*> SingleResults; - - Module &Mod = *CommonDest->getParent()->getParent(); - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - - Constant *SingleResult = NULL; - LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, - ResultLists[PHI], DefaultResults[PHI], - &SingleResult); - SingleResults[PHI] = SingleResult; - } - // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), @@ -3414,19 +3538,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); bool ReturnedEarly = false; - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - // There was a single result for this phi; just use that. - if (Constant *SingleResult = SingleResults[PHI]) { - PHI->addIncoming(SingleResult, LookupBB); - continue; - } + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], + DefaultResults[PHI], TD); - Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; - Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, - "switch.gep"); - Value *Result = Builder.CreateLoad(GEP, "switch.load"); + Value *Result = Table.BuildLookup(TableIndex, Builder); // If the result is used to return immediately from the function, we want to // do that right here. @@ -3494,7 +3612,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; - if (SwitchToLookupTable(SI, Builder)) + if (SwitchToLookupTable(SI, Builder, TD)) return SimplifyCFG(BB) | true; return false; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fc2538db64..a30b09321b 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -21,7 +21,7 @@ using namespace llvm; // Out of line method to get vtable etc for class. -void ValueMapTypeRemapper::Anchor() {} +void ValueMapTypeRemapper::anchor() {} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper) { |