diff options
author | Chris Lattner <sabre@nondot.org> | 2003-01-13 00:32:26 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2003-01-13 00:32:26 +0000 |
commit | 3e130a2a770d8a0bd968878d8fb0ffe7845fb606 (patch) | |
tree | 3b7233e9c9e0a841a427a15dbcc358342417b81c /lib/Target/X86/InstSelectSimple.cpp | |
parent | 019a7c801b491504868ad105016fc6ebbb09ff5d (diff) |
* Adjust to use new interfaces, eliminating CurReg stuff
* Support arbitrary FP constants
* Fix bugs in frame layout for function calls and incoming arguments
* Insert copies for constant arguments to PHI nodes into the BOTTOM of
predecessor blocks, not the top.
* Implement _floating point_ support: setcc, return, load, store, cast
* Fix several bugs in the cast instruction
* Abstract out call emission and load/store for FP
* Implement malloc/free without previous lowering pass.
* Make use of new forms of MachineOperand
* Implement _long_ support!
* Fix many bugs in FP support
* Change branch from je/jne to je/jmp
* Simplify code generated for GEP instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@5223 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/InstSelectSimple.cpp')
-rw-r--r-- | lib/Target/X86/InstSelectSimple.cpp | 1212 |
1 files changed, 802 insertions, 410 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index ad8501d798..43f25325e8 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -1,6 +1,6 @@ //===-- InstSelectSimple.cpp - A simple instruction selector for x86 ------===// // -// This file defines a simple peephole instruction selector for the x86 platform +// This file defines a simple peephole instruction selector for the x86 target // //===----------------------------------------------------------------------===// @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SSARegMap.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Target/MRegisterInfo.h" @@ -59,14 +60,12 @@ namespace { MachineFunction *F; // The function we are compiling into MachineBasicBlock *BB; // The current MBB we are compiling - unsigned CurReg; std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs // MBBMap - Mapping between LLVM BB -> Machine BB std::map<const BasicBlock*, MachineBasicBlock*> MBBMap; - ISel(TargetMachine &tm) - : TM(tm), F(0), BB(0), CurReg(MRegisterInfo::FirstVirtualRegister) {} + ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {} /// runOnFunction - Top level implementation of instruction selection for /// the entire function. @@ -89,7 +88,6 @@ namespace { RegMap.clear(); MBBMap.clear(); - CurReg = MRegisterInfo::FirstVirtualRegister; F = 0; return false; // We never modify the LLVM itself. } @@ -125,6 +123,14 @@ namespace { // Control flow operators void visitReturnInst(ReturnInst &RI); void visitBranchInst(BranchInst &BI); + + struct ValueRecord { + unsigned Reg; + const Type *Ty; + ValueRecord(unsigned R, const Type *T) : Reg(R), Ty(T) {} + }; + void doCall(const ValueRecord &Ret, MachineInstr *CallMI, + const std::vector<ValueRecord> &Args); void visitCallInst(CallInst &I); // Arithmetic operators @@ -132,8 +138,8 @@ namespace { void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); } void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); } void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI, - unsigned destReg, const Type *resultType, - unsigned op0Reg, unsigned op1Reg); + unsigned DestReg, const Type *DestTy, + unsigned Op0Reg, unsigned Op1Reg); void visitMul(BinaryOperator &B); void visitDiv(BinaryOperator &B) { visitDivRem(B); } @@ -155,15 +161,16 @@ namespace { void visitSetGE(SetCondInst &I) { visitSetCCInst(I, 5); } // Memory Instructions + MachineInstr *doFPLoad(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &MBBI, + const Type *Ty, unsigned DestReg); void visitLoadInst(LoadInst &I); + void doFPStore(const Type *Ty, unsigned DestAddrReg, unsigned SrcReg); void visitStoreInst(StoreInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); void visitAllocaInst(AllocaInst &I); - - // We assume that by this point, malloc instructions have been - // lowered to calls, and dlsym will magically find malloc for us. - void visitMallocInst(MallocInst &I) { visitInstruction (I); } - void visitFreeInst(FreeInst &I) { visitInstruction(I); } + void visitMallocInst(MallocInst &I); + void visitFreeInst(FreeInst &I); // Other operators void visitShiftInst(ShiftInst &I); @@ -176,11 +183,16 @@ namespace { } /// promote32 - Make a value 32-bits wide, and put it somewhere. - void promote32 (const unsigned targetReg, Value *v); + /// + void promote32(unsigned targetReg, const ValueRecord &VR); + + /// EmitByteSwap - Byteswap SrcReg into DestReg. + /// + void EmitByteSwap(unsigned DestReg, unsigned SrcReg, unsigned Class); - // emitGEPOperation - Common code shared between visitGetElementPtrInst and - // constant expression GEP support. - // + /// emitGEPOperation - Common code shared between visitGetElementPtrInst and + /// constant expression GEP support. + /// void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator&IP, Value *Src, User::op_iterator IdxBegin, User::op_iterator IdxEnd, unsigned TargetReg); @@ -192,14 +204,28 @@ namespace { MachineBasicBlock::iterator &MBBI, Constant *C, unsigned Reg); - /// makeAnotherReg - This method returns the next register number - /// we haven't yet used. + /// makeAnotherReg - This method returns the next register number we haven't + /// yet used. + /// + /// Long values are handled somewhat specially. They are always allocated + /// as pairs of 32 bit integer values. The register number returned is the + /// lower 32 bits of the long value, and the regNum+1 is the upper 32 bits + /// of the long value. + /// unsigned makeAnotherReg(const Type *Ty) { + if (Ty == Type::LongTy || Ty == Type::ULongTy) { + const TargetRegisterClass *RC = + TM.getRegisterInfo()->getRegClassForType(Type::IntTy); + // Create the lower part + F->getSSARegMap()->createVirtualRegister(RC); + // Create the upper part. + return F->getSSARegMap()->createVirtualRegister(RC)-1; + } + // Add the mapping of regnumber => reg class to MachineFunction const TargetRegisterClass *RC = TM.getRegisterInfo()->getRegClassForType(Ty); - F->getSSARegMap()->addRegMap(CurReg, RC); - return CurReg++; + return F->getSSARegMap()->createVirtualRegister(RC); } /// getReg - This method turns an LLVM value into a register number. This @@ -228,7 +254,7 @@ namespace { RegMap.erase(V); // Assign a new name to this constant if ref'd again } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // Move the address of the global into the register - BMI(MBB, IPt, X86::MOVir32, 1, Reg).addReg(GV); + BMI(MBB, IPt, X86::MOVir32, 1, Reg).addGlobalAddress(GV); RegMap.erase(V); // Assign a new name to this address if ref'd again } @@ -259,9 +285,9 @@ static inline TypeClass getClass(const Type *Ty) { case Type::FloatTyID: case Type::DoubleTyID: return cFP; // Floating Point is #3 + case Type::LongTyID: - case Type::ULongTyID: //return cLong; // Longs are class #3 - return cInt; // FIXME: LONGS ARE TREATED AS INTS! + case Type::ULongTyID: return cLong; // Longs are class #4 default: assert(0 && "Invalid type to getClass!"); return cByte; // not reached @@ -294,6 +320,20 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, if (C->getType()->isIntegral()) { unsigned Class = getClassB(C->getType()); + + if (Class == cLong) { + // Copy the value into the register pair. + uint64_t Val; + if (C->getType()->isSigned()) + Val = cast<ConstantSInt>(C)->getValue(); + else + Val = cast<ConstantUInt>(C)->getValue(); + + BMI(MBB, IP, X86::MOVir32, 1, R).addZImm(Val & 0xFFFFFFFF); + BMI(MBB, IP, X86::MOVir32, 1, R+1).addZImm(Val >> 32); + return; + } + assert(Class <= cInt && "Type not handled yet!"); static const unsigned IntegralOpcodeTab[] = { @@ -304,7 +344,7 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, BMI(MBB, IP, X86::MOVir8, 1, R).addZImm(C == ConstantBool::True); } else if (C->getType()->isSigned()) { ConstantSInt *CSI = cast<ConstantSInt>(C); - BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addSImm(CSI->getValue()); + BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CSI->getValue()); } else { ConstantUInt *CUI = cast<ConstantUInt>(C); BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CUI->getValue()); @@ -316,8 +356,10 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, else if (Value == +1.0) BMI(MBB, IP, X86::FLD1, 0, R); else { - std::cerr << "Cannot load constant '" << Value << "'!\n"; - assert(0); + // Otherwise we need to spill the constant to memory... + MachineConstantPool *CP = F->getConstantPool(); + unsigned CPI = CP->getConstantPoolIndex(CFP); + addConstantPoolReference(doFPLoad(MBB, IP, CFP->getType(), R), CPI); } } else if (isa<ConstantPointerNull>(C)) { @@ -340,19 +382,17 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { // X86, the stack frame looks like this: // // [ESP] -- return address - // [ESP + 4] -- first argument (leftmost lexically) if four bytes in size - // [ESP + 8] -- second argument, if four bytes in size + // [ESP + 4] -- first argument (leftmost lexically) + // [ESP + 8] -- second argument, if first argument is four bytes in size // ... // - unsigned ArgOffset = 0; + unsigned ArgOffset = 4; MachineFrameInfo *MFI = F->getFrameInfo(); for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) { unsigned Reg = getReg(*I); - ArgOffset += 4; // Each argument takes at least 4 bytes on the stack... int FI; // Frame object index - switch (getClassB(I->getType())) { case cByte: FI = MFI->CreateFixedObject(1, ArgOffset); @@ -366,6 +406,12 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { FI = MFI->CreateFixedObject(4, ArgOffset); addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI); break; + case cLong: + FI = MFI->CreateFixedObject(8, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI); + addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg+1), FI, 4); + ArgOffset += 4; // longs require 4 additional bytes + break; case cFP: unsigned Opcode; if (I->getType() == Type::FloatTy) { @@ -373,14 +419,15 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { FI = MFI->CreateFixedObject(4, ArgOffset); } else { Opcode = X86::FLDr64; - ArgOffset += 4; // doubles require 4 additional bytes FI = MFI->CreateFixedObject(8, ArgOffset); + ArgOffset += 4; // doubles require 4 additional bytes } addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI); break; default: assert(0 && "Unhandled argument type!"); } + ArgOffset += 4; // Each argument takes at least 4 bytes on the stack... } } @@ -390,6 +437,7 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { /// the current one. /// void ISel::SelectPHINodes() { + const MachineInstrInfo &MII = TM.getInstrInfo(); const Function &LF = *F->getFunction(); // The LLVM function... for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) { const BasicBlock *BB = I; @@ -399,9 +447,17 @@ void ISel::SelectPHINodes() { unsigned NumPHIs = 0; for (BasicBlock::const_iterator I = BB->begin(); PHINode *PN = (PHINode*)dyn_cast<PHINode>(&*I); ++I) { + // Create a new machine instr PHI node, and insert it. - MachineInstr *MI = BuildMI(X86::PHI, PN->getNumOperands(), getReg(*PN)); - MBB->insert(MBB->begin()+NumPHIs++, MI); // Insert it at the top of the BB + unsigned PHIReg = getReg(*PN); + MachineInstr *PhiMI = BuildMI(X86::PHI, PN->getNumOperands(), PHIReg); + MBB->insert(MBB->begin()+NumPHIs++, PhiMI); + + MachineInstr *LongPhiMI = 0; + if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy) { + LongPhiMI = BuildMI(X86::PHI, PN->getNumOperands(), PHIReg+1); + MBB->insert(MBB->begin()+NumPHIs++, LongPhiMI); + } for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)]; @@ -410,13 +466,17 @@ void ISel::SelectPHINodes() { // available in a virtual register, insert the computation code into // PredMBB // - // FIXME: This should insert the code into the BOTTOM of the block, not - // the top of the block. This just makes for huge live ranges... - MachineBasicBlock::iterator PI = PredMBB->begin(); - while ((*PI)->getOpcode() == X86::PHI) ++PI; - - MI->addRegOperand(getReg(PN->getIncomingValue(i), PredMBB, PI)); - MI->addMachineBasicBlockOperand(PredMBB); + MachineBasicBlock::iterator PI = PredMBB->end(); + while (PI != PredMBB->begin() && + MII.isTerminatorInstr((*(PI-1))->getOpcode())) + --PI; + unsigned ValReg = getReg(PN->getIncomingValue(i), PredMBB, PI); + PhiMI->addRegOperand(ValReg); + PhiMI->addMachineBasicBlockOperand(PredMBB); + if (LongPhiMI) { + LongPhiMI->addRegOperand(ValReg+1); + LongPhiMI->addMachineBasicBlockOperand(PredMBB); + } } } } @@ -426,102 +486,108 @@ void ISel::SelectPHINodes() { /// SetCC instructions - Here we just emit boilerplate code to set a byte-sized /// register, then move it to wherever the result should be. -/// We handle FP setcc instructions by pushing them, doing a -/// compare-and-pop-twice, and then copying the concodes to the main -/// processor's concodes (I didn't make this up, it's in the Intel manual) /// void ISel::visitSetCCInst(SetCondInst &I, unsigned OpNum) { // The arguments are already supposed to be of the same type. const Type *CompTy = I.getOperand(0)->getType(); + bool isSigned = CompTy->isSigned(); unsigned reg1 = getReg(I.getOperand(0)); unsigned reg2 = getReg(I.getOperand(1)); + unsigned DestReg = getReg(I); + + // LLVM -> X86 signed X86 unsigned + // ----- ---------- ------------ + // seteq -> sete sete + // setne -> setne setne + // setlt -> setl setb + // setgt -> setg seta + // setle -> setle setbe + // setge -> setge setae + static const unsigned OpcodeTab[2][6] = { + {X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAr, X86::SETBEr, X86::SETAEr}, + {X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGr, X86::SETLEr, X86::SETGEr}, + }; - unsigned Class = getClass(CompTy); + unsigned Class = getClassB(CompTy); switch (Class) { + default: assert(0 && "Unknown type class!"); // Emit: cmp <var1>, <var2> (do the comparison). We can // compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with // 32-bit. case cByte: - BuildMI (BB, X86::CMPrr8, 2).addReg (reg1).addReg (reg2); + BuildMI(BB, X86::CMPrr8, 2).addReg(reg1).addReg(reg2); break; case cShort: - BuildMI (BB, X86::CMPrr16, 2).addReg (reg1).addReg (reg2); + BuildMI(BB, X86::CMPrr16, 2).addReg(reg1).addReg(reg2); break; case cInt: - BuildMI (BB, X86::CMPrr32, 2).addReg (reg1).addReg (reg2); - break; - -#if 0 - // Push the variables on the stack with fldl opcodes. - // FIXME: assuming var1, var2 are in memory, if not, spill to - // stack first - case cFP: // Floats - BuildMI (BB, X86::FLDr32, 1).addReg (reg1); - BuildMI (BB, X86::FLDr32, 1).addReg (reg2); + BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2); break; - case cFP (doubles): // Doubles - BuildMI (BB, X86::FLDr64, 1).addReg (reg1); - BuildMI (BB, X86::FLDr64, 1).addReg (reg2); + case cFP: + BuildMI(BB, X86::FpUCOM, 2).addReg(reg1).addReg(reg2); + BuildMI(BB, X86::FNSTSWr8, 0); + BuildMI(BB, X86::SAHF, 1); + isSigned = false; // Compare with unsigned operators break; -#endif + case cLong: - default: - visitInstruction(I); - } + if (OpNum < 2) { // seteq, setne + unsigned LoTmp = makeAnotherReg(Type::IntTy); + unsigned HiTmp = makeAnotherReg(Type::IntTy); + unsigned FinalTmp = makeAnotherReg(Type::IntTy); + BuildMI(BB, X86::XORrr32, 2, LoTmp).addReg(reg1).addReg(reg2); + BuildMI(BB, X86::XORrr32, 2, HiTmp).addReg(reg1+1).addReg(reg2+1); + BuildMI(BB, X86::ORrr32, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); + break; // Allow the sete or setne to be generated from flags set by OR + } else { + // Emit a sequence of code which compares the high and low parts once + // each, then uses a conditional move to handle the overflow case. For + // example, a setlt for long would generate code like this: + // + // AL = lo(op1) < lo(op2) // Signedness depends on operands + // BL = hi(op1) < hi(op2) // Always unsigned comparison + // dest = hi(op1) == hi(op2) ? AL : BL; + // -#if 0 - if (CompTy->isFloatingPoint()) { - // (Non-trapping) compare and pop twice. - BuildMI (BB, X86::FUCOMPP, 0); - // Move fp status word (concodes) to ax. - BuildMI (BB, X86::FNSTSWr8, 1, X86::AX); - // Load real concodes from ax. - BuildMI (BB, X86::SAHF, 1).addReg(X86::AH); + // FIXME: This would be much better if we had heirarchical register + // classes! Until then, hardcode registers so that we can deal with their + // aliases (because we don't have conditional byte moves). + // + BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2); + BuildMI(BB, OpcodeTab[0][OpNum], 0, X86::AL); + BuildMI(BB, X86::CMPrr32, 2).addReg(reg1+1).addReg(reg2+1); + BuildMI(BB, OpcodeTab[isSigned][OpNum], 0, X86::BL); + BuildMI(BB, X86::CMOVErr16, 2, X86::BX).addReg(X86::BX).addReg(X86::AX); + BuildMI(BB, X86::MOVrr8, 1, DestReg).addReg(X86::BL); + return; + } } -#endif - // Emit setOp instruction (extract concode; clobbers ax), - // using the following mapping: - // LLVM -> X86 signed X86 unsigned - // ----- ----- ----- - // seteq -> sete sete - // setne -> setne setne - // setlt -> setl setb - // setgt -> setg seta - // setle -> setle setbe - // setge -> setge setae - - static const unsigned OpcodeTab[2][6] = { - {X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAr, X86::SETBEr, X86::SETAEr}, - {X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGr, X86::SETLEr, X86::SETGEr}, - }; - - BuildMI(BB, OpcodeTab[CompTy->isSigned()][OpNum], 0, getReg(I)); + BuildMI(BB, OpcodeTab[isSigned][OpNum], 0, DestReg); } /// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide /// operand, in the specified target register. -void ISel::promote32 (unsigned targetReg, Value *v) { - unsigned vReg = getReg(v); - bool isUnsigned = v->getType()->isUnsigned(); - switch (getClass(v->getType())) { +void ISel::promote32(unsigned targetReg, const ValueRecord &VR) { + bool isUnsigned = VR.Ty->isUnsigned(); + switch (getClassB(VR.Ty)) { case cByte: // Extend value into target register (8->32) if (isUnsigned) - BuildMI(BB, X86::MOVZXr32r8, 1, targetReg).addReg(vReg); + BuildMI(BB, X86::MOVZXr32r8, 1, targetReg).addReg(VR.Reg); else - BuildMI(BB, X86::MOVSXr32r8, 1, targetReg).addReg(vReg); + BuildMI(BB, X86::MOVSXr32r8, 1, targetReg).addReg(VR.Reg); break; case cShort: // Extend value into target register (16->32) if (isUnsigned) - BuildMI(BB, X86::MOVZXr32r16, 1, targetReg).addReg(vReg); + BuildMI(BB, X86::MOVZXr32r16, 1, targetReg).addReg(VR.Reg); else - BuildMI(BB, X86::MOVSXr32r16, 1, targetReg).addReg(vReg); + BuildMI(BB, X86::MOVSXr32r16, 1, targetReg).addReg(VR.Reg); break; case cInt: // Move value into target register (32->32) - BuildMI(BB, X86::MOVrr32, 1, targetReg).addReg(vReg); + BuildMI(BB, X86::MOVrr32, 1, targetReg).addReg(VR.Reg); break; default: assert(0 && "Unpromotable operand class in promote32"); @@ -539,27 +605,29 @@ void ISel::promote32 (unsigned targetReg, Value *v) { /// ret long, ulong : Move value into EAX/EDX and return /// ret float/double : Top of FP stack /// -void ISel::visitReturnInst (ReturnInst &I) { +void ISel::visitReturnInst(ReturnInst &I) { if (I.getNumOperands() == 0) { BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction return; } Value *RetVal = I.getOperand(0); - switch (getClass(RetVal->getType())) { + unsigned RetReg = getReg(RetVal); + switch (getClassB(RetVal->getType())) { case cByte: // integral return values: extend or move into EAX and return case cShort: case cInt: - promote32(X86::EAX, RetVal); + promote32(X86::EAX, ValueRecord(RetReg, RetVal->getType())); break; case cFP: // Floats & Doubles: Return in ST(0) - BuildMI(BB, X86::FpMOV, 1, X86::ST0).addReg(getReg(RetVal)); + BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg); break; case cLong: - // ret long: use EAX(least significant 32 bits)/EDX (most - // significant 32)... + BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(RetReg); + BuildMI(BB, X86::MOVrr32, 1, X86::EDX).addReg(RetReg+1); + break; default: - visitInstruction (I); + visitInstruction(I); } // Emit a 'ret' instruction BuildMI(BB, X86::RET, 0); @@ -572,36 +640,33 @@ void ISel::visitReturnInst (ReturnInst &I) { /// void ISel::visitBranchInst(BranchInst &BI) { if (BI.isConditional()) { - BasicBlock *ifTrue = BI.getSuccessor(0); - BasicBlock *ifFalse = BI.getSuccessor(1); - - // Compare condition with zero, followed by jump-if-equal to ifFalse, and - // jump-if-nonequal to ifTrue unsigned condReg = getReg(BI.getCondition()); BuildMI(BB, X86::CMPri8, 2).addReg(condReg).addZImm(0); - BuildMI(BB, X86::JNE, 1).addPCDisp(BI.getSuccessor(0)); BuildMI(BB, X86::JE, 1).addPCDisp(BI.getSuccessor(1)); - } else { // unconditional branch - BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0)); } + BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0)); } -/// visitCallInst - Push args on stack and do a procedure call instruction. -void ISel::visitCallInst(CallInst &CI) { + +/// doCall - This emits an abstract call instruction, setting up the arguments +/// and the return value as appropriate. For the actual function call itself, +/// it inserts the specified CallMI instruction into the stream. +/// +void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, + const std::vector<ValueRecord> &Args) { + // Count how many bytes are to be pushed on the stack... unsigned NumBytes = 0; - if (CI.getNumOperands() > 1) { - for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) - switch (getClass(CI.getOperand(i)->getType())) { + if (!Args.empty()) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) + switch (getClassB(Args[i].Ty)) { case cByte: case cShort: case cInt: - NumBytes += 4; - break; + NumBytes += 4; break; case cLong: - NumBytes += 8; - break; + NumBytes += 8; break; case cFP: - NumBytes += CI.getOperand(i)->getType() == Type::FloatTy ? 4 : 8; + NumBytes += Args[i].Ty == Type::FloatTy ? 4 : 8; break; default: assert(0 && "Unknown class!"); } @@ -611,60 +676,60 @@ void ISel::visitCallInst(CallInst &CI) { // Arguments go on the stack in reverse order, as specified by the ABI. unsigned ArgOffset = 0; - for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) { - Value *Arg = CI.getOperand(i); - switch (getClass(Arg->getType())) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + unsigned ArgReg = Args[i].Reg; + switch (getClassB(Args[i].Ty)) { case cByte: case cShort: { // Promote arg to 32 bits wide into a temporary register... unsigned R = makeAnotherReg(Type::UIntTy); - promote32(R, Arg); + promote32(R, Args[i]); addRegOffset(BuildMI(BB, X86::MOVrm32, 5), X86::ESP, ArgOffset).addReg(R); break; } case cInt: addRegOffset(BuildMI(BB, X86::MOVrm32, 5), - X86::ESP, ArgOffset).addReg(getReg(Arg)); + X86::ESP, ArgOffset).addReg(ArgReg); break; - + case cLong: + addRegOffset(BuildMI(BB, X86::MOVrm32, 5), + X86::ESP, ArgOffset).addReg(ArgReg); + addRegOffset(BuildMI(BB, X86::MOVrm32, 5), + X86::ESP, ArgOffset+4).addReg(ArgReg+1); + ArgOffset += 4; // 8 byte entry, not 4. + break; + case cFP: - if (Arg->getType() == Type::FloatTy) { + if (Args[i].Ty == Type::FloatTy) { addRegOffset(BuildMI(BB, X86::FSTr32, 5), - X86::ESP, ArgOffset).addReg(getReg(Arg)); + X86::ESP, ArgOffset).addReg(ArgReg); } else { - assert(Arg->getType() == Type::DoubleTy && "Unknown FP type!"); - ArgOffset += 4; - addRegOffset(BuildMI(BB, X86::FSTr32, 5), - X86::ESP, ArgOffset).addReg(getReg(Arg)); + assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!"); + addRegOffset(BuildMI(BB, X86::FSTr64, 5), + X86::ESP, ArgOffset).addReg(ArgReg); + ArgOffset += 4; // 8 byte entry, not 4. } break; - default: - // FIXME: long/ulong/float/double args not handled. - visitInstruction(CI); - break; + default: assert(0 && "Unknown class!"); } ArgOffset += 4; } - } - - if (Function *F = CI.getCalledFunction()) { - // Emit a CALL instruction with PC-relative displacement. - BuildMI(BB, X86::CALLpcrel32, 1).addPCDisp(F); } else { - unsigned Reg = getReg(CI.getCalledValue()); - BuildMI(BB, X86::CALLr32, 1).addReg(Reg); + BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addZImm(0); } + BB->push_back(CallMI); + BuildMI(BB, X86::ADJCALLSTACKUP, 1).addZImm(NumBytes); // If there is a return value, scavenge the result from the location the call // leaves it in... // - if (CI.getType() != Type::VoidTy) { - unsigned resultTypeClass = getClass(CI.getType()); - switch (resultTypeClass) { + if (Ret.Ty != Type::VoidTy) { + unsigned DestClass = getClassB(Ret.Ty); + switch (DestClass) { case cByte: case cShort: case cInt: { @@ -674,32 +739,49 @@ void ISel::visitCallInst(CallInst &CI) { X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 }; static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX }; - BuildMI(BB, regRegMove[resultTypeClass], 1, getReg(CI)) - .addReg(AReg[resultTypeClass]); + BuildMI(BB, regRegMove[DestClass], 1, Ret.Reg).addReg(AReg[DestClass]); break; } case cFP: // Floating-point return values live in %ST(0) - BuildMI(BB, X86::FpMOV, 1, getReg(CI)).addReg(X86::ST0); + BuildMI(BB, X86::FpGETRESULT, 1, Ret.Reg); break; - default: - std::cerr << "Cannot get return value for call of type '" - << *CI.getType() << "'\n"; - visitInstruction(CI); + case cLong: // Long values are left in EDX:EAX + BuildMI(BB, X86::MOVrr32, 1, Ret.Reg).addReg(X86::EAX); + BuildMI(BB, X86::MOVrr32, 1, Ret.Reg+1).addReg(X86::EDX); + break; + default: assert(0 && "Unknown class!"); } } } + +/// visitCallInst - Push args on stack and do a procedure call instruction. +void ISel::visitCallInst(CallInst &CI) { + MachineInstr *TheCall; + if (Function *F = CI.getCalledFunction()) { + // Emit a CALL instruction with PC-relative displacement. + TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true); + } else { // Emit an indirect call... + unsigned Reg = getReg(CI.getCalledValue()); + TheCall = BuildMI(X86::CALLr32, 1).addReg(Reg); + } + + std::vector<ValueRecord> Args; + for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) + Args.push_back(ValueRecord(getReg(CI.getOperand(i)), + CI.getOperand(i)->getType())); + + unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0; + doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args); +} + + /// visitSimpleBinary - Implement simple binary operators for integral types... /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, /// 4 for Xor. /// void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { - if (B.getType() == Type::BoolTy) // FIXME: Handle bools for logicals - visitInstruction(B); - - unsigned Class = getClass(B.getType()); - if (Class > cFP) // FIXME: Handle longs - visitInstruction(B); + unsigned Class = getClassB(B.getType()); static const unsigned OpcodeTab[][4] = { // Arithmetic operators @@ -711,28 +793,45 @@ void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { { X86:: ORrr8, X86:: ORrr16, X86:: ORrr32, 0 }, // OR { X86::XORrr8, X86::XORrr16, X86::XORrr32, 0 }, // XOR }; + + bool isLong = false; + if (Class == cLong) { + isLong = true; + Class = cInt; // Bottom 32 bits are handled just like ints + } unsigned Opcode = OpcodeTab[OperatorClass][Class]; assert(Opcode && "Floating point arguments to logical inst?"); unsigned Op0r = getReg(B.getOperand(0)); unsigned Op1r = getReg(B.getOperand(1)); - BuildMI(BB, Opcode, 2, getReg(B)).addReg(Op0r).addReg(Op1r); + unsigned DestReg = getReg(B); + BuildMI(BB, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); + + if (isLong) { // Handle the upper 32 bits of long values... + static const unsigned TopTab[] = { + X86::ADCrr32, X86::SBBrr32, X86::ANDrr32, X86::ORrr32, X86::XORrr32 + }; + BuildMI(BB, TopTab[OperatorClass], 2, + DestReg+1).addReg(Op0r+1).addReg(Op1r+1); + } } -/// doMultiply - Emit appropriate instructions to multiply together -/// the registers op0Reg and op1Reg, and put the result in destReg. -/// The type of the result should be given as resultType. +/// doMultiply - Emit appropriate instructions to multiply together the +/// registers op0Reg and op1Reg, and put the result in DestReg. The type of the +/// result should be given as DestTy. +/// +/// FIXME: doMultiply should use one of the two address IMUL instructions! +/// void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI, - unsigned destReg, const Type *resultType, + unsigned DestReg, const Type *DestTy, unsigned op0Reg, unsigned op1Reg) { - unsigned Class = getClass(resultType); + unsigned Class = getClass(DestTy); switch (Class) { case cFP: // Floating point multiply - BuildMI(BB, X86::FpMUL, 2, destReg).addReg(op0Reg).addReg(op1Reg); + BMI(BB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg); return; default: - case cLong: - assert(0 && "doMultiply not implemented for this class yet!"); + case cLong: assert(0 && "doMultiply cannot operate on LONG values!"); case cByte: case cShort: case cInt: // Small integerals, handled below... @@ -740,30 +839,58 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI, } static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; - static const unsigned MulOpcode[]={ X86::MULrr8, X86::MULrr16, X86::MULrr32 }; + static const unsigned MulOpcode[]={ X86::MULr8 , X86::MULr16 , X86::MULr32 }; static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 }; unsigned Reg = Regs[Class]; // Emit a MOV to put the first operand into the appropriately-sized // subreg of EAX. - BMI(MBB, MBBI, MovOpcode[Class], 1, Reg).addReg (op0Reg); + BMI(MBB, MBBI, MovOpcode[Class], 1, Reg).addReg(op0Reg); // Emit the appropriate multiply instruction. - BMI(MBB, MBBI, MulOpcode[Class], 1).addReg (op1Reg); + BMI(MBB, MBBI, MulOpcode[Class], 1).addReg(op1Reg); // Emit another MOV to put the result into the destination register. - BMI(MBB, MBBI, MovOpcode[Class], 1, destReg).addReg (Reg); + BMI(MBB, MBBI, MovOpcode[Class], 1, DestReg).addReg(Reg); } /// visitMul - Multiplies are not simple binary operators because they must deal /// with the EAX register explicitly. /// void ISel::visitMul(BinaryOperator &I) { - unsigned DestReg = getReg(I); unsigned Op0Reg = getReg(I.getOperand(0)); unsigned Op1Reg = getReg(I.getOperand(1)); - MachineBasicBlock::iterator MBBI = BB->end(); - doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg); + unsigned DestReg = getReg(I); + + // Simple scalar multiply? + if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) { + MachineBasicBlock::iterator MBBI = BB->end(); + doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg); + } else { + // Long value. We have to do things the hard way... + // Multiply the two low parts... capturing carry into EDX + BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, X86::MULr32, 1).addReg(Op1Reg); // AL*BL + + unsigned OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOVrr32, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, X86::MOVrr32, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + + MachineBasicBlock::iterator MBBI = BB->end(); + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); + doMultiply(BB, MBBI, AHBLReg, Type::UIntTy, Op0Reg+1, Op1Reg); // AH*BL + + unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ADDrr32, 2, // AH*BL+(AL*BL >> 32) + AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); + + MBBI = BB->end(); + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); + doMultiply(BB, MBBI, ALBHReg, Type::UIntTy, Op0Reg, Op1Reg+1); // AL*BH + + BuildMI(BB, X86::ADDrr32, 2, // AL*BH + AH*BL + (AL*BL >> 32) + DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + } } @@ -779,19 +906,36 @@ void ISel::visitDivRem(BinaryOperator &I) { unsigned ResultReg = getReg(I); switch (Class) { - case cFP: // Floating point multiply + case cFP: // Floating point divide if (I.getOpcode() == Instruction::Div) BuildMI(BB, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg); - else - BuildMI(BB, X86::FpREM, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg); + else { // Floating point remainder... + MachineInstr *TheCall = + BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true); + std::vector<ValueRecord> Args; + Args.push_back(ValueRecord(Op0Reg, Type::DoubleTy)); + Args.push_back(ValueRecord(Op1Reg, Type::DoubleTy)); + doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args); + } + return; + case cLong: { + static const char *FnName[] = + { "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" }; + + unsigned NameIdx = I.getType()->isUnsigned()*2; + NameIdx += I.getOpcode() == Instruction::Div; + MachineInstr *TheCall = + BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true); + + std::vector<ValueRecord> Args; + Args.push_back(ValueRecord(Op0Reg, Type::LongTy)); + Args.push_back(ValueRecord(Op1Reg, Type::LongTy)); + doCall(ValueRecord(ResultReg, Type::LongTy), TheCall, Args); return; - default: - case cLong: - assert(0 && "div/rem not implemented for this class yet!"); - case cByte: - case cShort: - case cInt: // Small integerals, handled below... - break; + } + case cByte: case cShort: case cInt: + break; // Small integerals, handled below... + default: assert(0 && "Unknown class!"); } static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; @@ -801,8 +945,8 @@ void ISel::visitDivRem(BinaryOperator &I) { static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX }; static const unsigned DivOpcode[][4] = { - { X86::DIVrr8 , X86::DIVrr16 , X86::DIVrr32 , 0 }, // Unsigned division - { X86::IDIVrr8, X86::IDIVrr16, X86::IDIVrr32, 0 }, // Signed division + { X86::DIVr8 , X86::DIVr16 , X86::DIVr32 , 0 }, // Unsigned division + { X86::IDIVr8, X86::IDIVr16, X86::IDIVr32, 0 }, // Signed division }; bool isSigned = I.getType()->isSigned(); @@ -836,60 +980,149 @@ void ISel::visitDivRem(BinaryOperator &I) { /// shift values equal to 1. Even the general case is sort of special, /// because the shift amount has to be in CL, not just any old register. /// -void ISel::visitShiftInst (ShiftInst &I) { - unsigned Op0r = getReg (I.getOperand(0)); +void ISel::visitShiftInst(ShiftInst &I) { + unsigned SrcReg = getReg(I.getOperand(0)); unsigned DestReg = getReg(I); bool isLeftShift = I.getOpcode() == Instruction::Shl; - bool isOperandSigned = I.getType()->isU |