diff options
| author | Chris Lattner <sabre@nondot.org> | 2004-05-13 07:40:27 +0000 | 
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2004-05-13 07:40:27 +0000 | 
| commit | cb2fd557eee69df671d43d8adf74ffe2a6d0cc2d (patch) | |
| tree | 4566aeabc1673c20eb62ea337894df7ab60ff1c4 /lib/Target/X86/InstSelectSimple.cpp | |
| parent | 042896206475428d486073e1201b2a36713129ef (diff) | |
Substantially improve code generation for address exposed locals (aka fixed
sized allocas in the entry block).  Instead of generating code like this:
entry:
  reg1024 = ESP+1234
... (much later)
  *reg1024 = 17
Generate code that looks like this:
entry:
  (no code generated)
... (much later)
  t = ESP+1234
  *t = 17
The advantage being that we DRAMATICALLY reduce the register pressure for these
silly temporaries (they were all being spilled to the stack, resulting in very
silly code).  This is actually a manual implementation of rematerialization :)
I have a patch to fold the alloca address computation into loads & stores, which
will make this much better still, but just getting this right took way too much time
and I'm sleepy.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@13554 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/InstSelectSimple.cpp')
| -rw-r--r-- | lib/Target/X86/InstSelectSimple.cpp | 165 | 
1 files changed, 101 insertions, 64 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index cba90bbb96..9f41f070cb 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -86,6 +86,10 @@ namespace {      // MBBMap - Mapping between LLVM BB -> Machine BB      std::map<const BasicBlock*, MachineBasicBlock*> MBBMap; +    // AllocaMap - Mapping from fixed sized alloca instructions to the +    // FrameIndex for the alloca. +    std::map<AllocaInst*, unsigned> AllocaMap; +      ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {}      /// runOnFunction - Top level implementation of instruction selection for @@ -122,6 +126,7 @@ namespace {        RegMap.clear();        MBBMap.clear(); +      AllocaMap.clear();        F = 0;        // We always build a machine code representation for the function        return true; @@ -350,9 +355,7 @@ namespace {        return F->getSSARegMap()->createVirtualRegister(RC);      } -    /// getReg - This method turns an LLVM value into a register number.  This -    /// is guaranteed to produce the same register number for a particular value -    /// every time it is queried. +    /// getReg - This method turns an LLVM value into a register number.      ///      unsigned getReg(Value &V) { return getReg(&V); }  // Allow references      unsigned getReg(Value *V) { @@ -361,36 +364,86 @@ namespace {        return getReg(V, BB, It);      }      unsigned getReg(Value *V, MachineBasicBlock *MBB, -                    MachineBasicBlock::iterator IPt) { -      // If this operand is a constant, emit the code to copy the constant into -      // the register here... -      // -      if (Constant *C = dyn_cast<Constant>(V)) { -        unsigned Reg = makeAnotherReg(V->getType()); -        copyConstantToRegister(MBB, IPt, C, Reg); -        return Reg; -      } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { -        unsigned Reg = makeAnotherReg(V->getType()); -        // Move the address of the global into the register -        BuildMI(*MBB, IPt, X86::MOV32ri, 1, Reg).addGlobalAddress(GV); -        return Reg; -      } else if (CastInst *CI = dyn_cast<CastInst>(V)) { -        // Do not emit noop casts at all. -        if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType())) -          return getReg(CI->getOperand(0), MBB, IPt); -      } - -      unsigned &Reg = RegMap[V]; -      if (Reg == 0) { -        Reg = makeAnotherReg(V->getType()); -        RegMap[V] = Reg; -      } +                    MachineBasicBlock::iterator IPt); -      return Reg; -    } +    /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca +    /// that is to be statically allocated with the initial stack frame +    /// adjustment. +    unsigned getFixedSizedAllocaFI(AllocaInst *AI);    };  } +/// dyn_castFixedAlloca - If the specified value is a fixed size alloca +/// instruction in the entry block, return it.  Otherwise, return a null +/// pointer. +static AllocaInst *dyn_castFixedAlloca(Value *V) { +  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { +    BasicBlock *BB = AI->getParent(); +    if (isa<ConstantUInt>(AI->getArraySize()) && BB ==&BB->getParent()->front()) +      return AI; +  } +  return 0; +} + +/// getReg - This method turns an LLVM value into a register number. +/// +unsigned ISel::getReg(Value *V, MachineBasicBlock *MBB, +                      MachineBasicBlock::iterator IPt) { +  // If this operand is a constant, emit the code to copy the constant into +  // the register here... +  // +  if (Constant *C = dyn_cast<Constant>(V)) { +    unsigned Reg = makeAnotherReg(V->getType()); +    copyConstantToRegister(MBB, IPt, C, Reg); +    return Reg; +  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { +    unsigned Reg = makeAnotherReg(V->getType()); +    // Move the address of the global into the register +    BuildMI(*MBB, IPt, X86::MOV32ri, 1, Reg).addGlobalAddress(GV); +    return Reg; +  } else if (CastInst *CI = dyn_cast<CastInst>(V)) { +    // Do not emit noop casts at all. +    if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType())) +      return getReg(CI->getOperand(0), MBB, IPt); +  } else if (AllocaInst *AI = dyn_castFixedAlloca(V)) { +    // If the alloca address couldn't be folded into the instruction addressing, +    // emit an explicit LEA as appropriate. +    unsigned Reg = makeAnotherReg(V->getType()); +    unsigned FI = getFixedSizedAllocaFI(AI); +    addFrameReference(BuildMI(*MBB, IPt, X86::LEA32r, 4, Reg), FI); +    return Reg; +  } + +  unsigned &Reg = RegMap[V]; +  if (Reg == 0) { +    Reg = makeAnotherReg(V->getType()); +    RegMap[V] = Reg; +  } + +  return Reg; +} + +/// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca +/// that is to be statically allocated with the initial stack frame +/// adjustment. +unsigned ISel::getFixedSizedAllocaFI(AllocaInst *AI) { +  // Already computed this? +  std::map<AllocaInst*, unsigned>::iterator I = AllocaMap.lower_bound(AI); +  if (I != AllocaMap.end() && I->first == AI) return I->second; + +  const Type *Ty = AI->getAllocatedType(); +  ConstantUInt *CUI = cast<ConstantUInt>(AI->getArraySize()); +  unsigned TySize = TM.getTargetData().getTypeSize(Ty); +  TySize *= CUI->getValue();   // Get total allocated size... +  unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty); +       +  // Create a new stack object using the frame manager... +  int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment); +  AllocaMap.insert(I, std::make_pair(AI, FrameIdx)); +  return FrameIdx; +} + +  /// copyConstantToRegister - Output the instructions required to put the  /// specified constant into the specified register.  /// @@ -630,27 +683,24 @@ void ISel::SelectPHINodes() {            // If this is a constant or GlobalValue, we may have to insert code            // into the basic block to compute it into a virtual register. -          if (isa<Constant>(Val) || isa<GlobalValue>(Val)) { -            if (isa<ConstantExpr>(Val)) { -              // Because we don't want to clobber any values which might be in -              // physical registers with the computation of this constant (which -              // might be arbitrarily complex if it is a constant expression), -              // just insert the computation at the top of the basic block. -              MachineBasicBlock::iterator PI = PredMBB->begin(); -               -              // Skip over any PHI nodes though! -              while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI) -                ++PI; -               -              ValReg = getReg(Val, PredMBB, PI); -            } else { -              // Simple constants get emitted at the end of the basic block, -              // before any terminator instructions.  We "know" that the code to -              // move a constant into a register will never clobber any flags. -              ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator()); -            } +          if ((isa<Constant>(Val) && !isa<ConstantExpr>(Val)) || +              isa<GlobalValue>(Val)) { +            // Simple constants get emitted at the end of the basic block, +            // before any terminator instructions.  We "know" that the code to +            // move a constant into a register will never clobber any flags. +            ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator());            } else { -            ValReg = getReg(Val); +            // Because we don't want to clobber any values which might be in +            // physical registers with the computation of this constant (which +            // might be arbitrarily complex if it is a constant expression), +            // just insert the computation at the top of the basic block. +            MachineBasicBlock::iterator PI = PredMBB->begin(); +             +            // Skip over any PHI nodes though! +            while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI) +              ++PI; +             +            ValReg = getReg(Val, PredMBB, PI);            }            // Remember that we inserted a value for this PHI for this predecessor @@ -3582,25 +3632,12 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,  /// frame manager, otherwise do it the hard way.  ///  void ISel::visitAllocaInst(AllocaInst &I) { +  if (dyn_castFixedAlloca(&I)) return; +    // Find the data size of the alloca inst's getAllocatedType.    const Type *Ty = I.getAllocatedType();    unsigned TySize = TM.getTargetData().getTypeSize(Ty); -  // If this is a fixed size alloca in the entry block for the function, -  // statically stack allocate the space. -  // -  if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(I.getArraySize())) { -    if (I.getParent() == I.getParent()->getParent()->begin()) { -      TySize *= CUI->getValue();   // Get total allocated size... -      unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty); -       -      // Create a new stack object using the frame manager... -      int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment); -      addFrameReference(BuildMI(BB, X86::LEA32r, 5, getReg(I)), FrameIdx); -      return; -    } -  } -      // Create a register to hold the temporary result of multiplying the type size    // constant by the variable amount.    unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy);  | 
