aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/InstSelectSimple.cpp
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2003-01-13 00:32:26 +0000
committerChris Lattner <sabre@nondot.org>2003-01-13 00:32:26 +0000
commit3e130a2a770d8a0bd968878d8fb0ffe7845fb606 (patch)
tree3b7233e9c9e0a841a427a15dbcc358342417b81c /lib/Target/X86/InstSelectSimple.cpp
parent019a7c801b491504868ad105016fc6ebbb09ff5d (diff)
* Adjust to use new interfaces, eliminating CurReg stuff
* Support arbitrary FP constants * Fix bugs in frame layout for function calls and incoming arguments * Insert copies for constant arguments to PHI nodes into the BOTTOM of predecessor blocks, not the top. * Implement _floating point_ support: setcc, return, load, store, cast * Fix several bugs in the cast instruction * Abstract out call emission and load/store for FP * Implement malloc/free without previous lowering pass. * Make use of new forms of MachineOperand * Implement _long_ support! * Fix many bugs in FP support * Change branch from je/jne to je/jmp * Simplify code generated for GEP instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@5223 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/InstSelectSimple.cpp')
-rw-r--r--lib/Target/X86/InstSelectSimple.cpp1212
1 files changed, 802 insertions, 410 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp
index ad8501d798..43f25325e8 100644
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@@ -1,6 +1,6 @@
//===-- InstSelectSimple.cpp - A simple instruction selector for x86 ------===//
//
-// This file defines a simple peephole instruction selector for the x86 platform
+// This file defines a simple peephole instruction selector for the x86 target
//
//===----------------------------------------------------------------------===//
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Target/MRegisterInfo.h"
@@ -59,14 +60,12 @@ namespace {
MachineFunction *F; // The function we are compiling into
MachineBasicBlock *BB; // The current MBB we are compiling
- unsigned CurReg;
std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs
// MBBMap - Mapping between LLVM BB -> Machine BB
std::map<const BasicBlock*, MachineBasicBlock*> MBBMap;
- ISel(TargetMachine &tm)
- : TM(tm), F(0), BB(0), CurReg(MRegisterInfo::FirstVirtualRegister) {}
+ ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {}
/// runOnFunction - Top level implementation of instruction selection for
/// the entire function.
@@ -89,7 +88,6 @@ namespace {
RegMap.clear();
MBBMap.clear();
- CurReg = MRegisterInfo::FirstVirtualRegister;
F = 0;
return false; // We never modify the LLVM itself.
}
@@ -125,6 +123,14 @@ namespace {
// Control flow operators
void visitReturnInst(ReturnInst &RI);
void visitBranchInst(BranchInst &BI);
+
+ struct ValueRecord {
+ unsigned Reg;
+ const Type *Ty;
+ ValueRecord(unsigned R, const Type *T) : Reg(R), Ty(T) {}
+ };
+ void doCall(const ValueRecord &Ret, MachineInstr *CallMI,
+ const std::vector<ValueRecord> &Args);
void visitCallInst(CallInst &I);
// Arithmetic operators
@@ -132,8 +138,8 @@ namespace {
void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
- unsigned destReg, const Type *resultType,
- unsigned op0Reg, unsigned op1Reg);
+ unsigned DestReg, const Type *DestTy,
+ unsigned Op0Reg, unsigned Op1Reg);
void visitMul(BinaryOperator &B);
void visitDiv(BinaryOperator &B) { visitDivRem(B); }
@@ -155,15 +161,16 @@ namespace {
void visitSetGE(SetCondInst &I) { visitSetCCInst(I, 5); }
// Memory Instructions
+ MachineInstr *doFPLoad(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const Type *Ty, unsigned DestReg);
void visitLoadInst(LoadInst &I);
+ void doFPStore(const Type *Ty, unsigned DestAddrReg, unsigned SrcReg);
void visitStoreInst(StoreInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
void visitAllocaInst(AllocaInst &I);
-
- // We assume that by this point, malloc instructions have been
- // lowered to calls, and dlsym will magically find malloc for us.
- void visitMallocInst(MallocInst &I) { visitInstruction (I); }
- void visitFreeInst(FreeInst &I) { visitInstruction(I); }
+ void visitMallocInst(MallocInst &I);
+ void visitFreeInst(FreeInst &I);
// Other operators
void visitShiftInst(ShiftInst &I);
@@ -176,11 +183,16 @@ namespace {
}
/// promote32 - Make a value 32-bits wide, and put it somewhere.
- void promote32 (const unsigned targetReg, Value *v);
+ ///
+ void promote32(unsigned targetReg, const ValueRecord &VR);
+
+ /// EmitByteSwap - Byteswap SrcReg into DestReg.
+ ///
+ void EmitByteSwap(unsigned DestReg, unsigned SrcReg, unsigned Class);
- // emitGEPOperation - Common code shared between visitGetElementPtrInst and
- // constant expression GEP support.
- //
+ /// emitGEPOperation - Common code shared between visitGetElementPtrInst and
+ /// constant expression GEP support.
+ ///
void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator&IP,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, unsigned TargetReg);
@@ -192,14 +204,28 @@ namespace {
MachineBasicBlock::iterator &MBBI,
Constant *C, unsigned Reg);
- /// makeAnotherReg - This method returns the next register number
- /// we haven't yet used.
+ /// makeAnotherReg - This method returns the next register number we haven't
+ /// yet used.
+ ///
+ /// Long values are handled somewhat specially. They are always allocated
+ /// as pairs of 32 bit integer values. The register number returned is the
+ /// lower 32 bits of the long value, and the regNum+1 is the upper 32 bits
+ /// of the long value.
+ ///
unsigned makeAnotherReg(const Type *Ty) {
+ if (Ty == Type::LongTy || Ty == Type::ULongTy) {
+ const TargetRegisterClass *RC =
+ TM.getRegisterInfo()->getRegClassForType(Type::IntTy);
+ // Create the lower part
+ F->getSSARegMap()->createVirtualRegister(RC);
+ // Create the upper part.
+ return F->getSSARegMap()->createVirtualRegister(RC)-1;
+ }
+
// Add the mapping of regnumber => reg class to MachineFunction
const TargetRegisterClass *RC =
TM.getRegisterInfo()->getRegClassForType(Ty);
- F->getSSARegMap()->addRegMap(CurReg, RC);
- return CurReg++;
+ return F->getSSARegMap()->createVirtualRegister(RC);
}
/// getReg - This method turns an LLVM value into a register number. This
@@ -228,7 +254,7 @@ namespace {
RegMap.erase(V); // Assign a new name to this constant if ref'd again
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// Move the address of the global into the register
- BMI(MBB, IPt, X86::MOVir32, 1, Reg).addReg(GV);
+ BMI(MBB, IPt, X86::MOVir32, 1, Reg).addGlobalAddress(GV);
RegMap.erase(V); // Assign a new name to this address if ref'd again
}
@@ -259,9 +285,9 @@ static inline TypeClass getClass(const Type *Ty) {
case Type::FloatTyID:
case Type::DoubleTyID: return cFP; // Floating Point is #3
+
case Type::LongTyID:
- case Type::ULongTyID: //return cLong; // Longs are class #3
- return cInt; // FIXME: LONGS ARE TREATED AS INTS!
+ case Type::ULongTyID: return cLong; // Longs are class #4
default:
assert(0 && "Invalid type to getClass!");
return cByte; // not reached
@@ -294,6 +320,20 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
if (C->getType()->isIntegral()) {
unsigned Class = getClassB(C->getType());
+
+ if (Class == cLong) {
+ // Copy the value into the register pair.
+ uint64_t Val;
+ if (C->getType()->isSigned())
+ Val = cast<ConstantSInt>(C)->getValue();
+ else
+ Val = cast<ConstantUInt>(C)->getValue();
+
+ BMI(MBB, IP, X86::MOVir32, 1, R).addZImm(Val & 0xFFFFFFFF);
+ BMI(MBB, IP, X86::MOVir32, 1, R+1).addZImm(Val >> 32);
+ return;
+ }
+
assert(Class <= cInt && "Type not handled yet!");
static const unsigned IntegralOpcodeTab[] = {
@@ -304,7 +344,7 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
BMI(MBB, IP, X86::MOVir8, 1, R).addZImm(C == ConstantBool::True);
} else if (C->getType()->isSigned()) {
ConstantSInt *CSI = cast<ConstantSInt>(C);
- BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addSImm(CSI->getValue());
+ BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CSI->getValue());
} else {
ConstantUInt *CUI = cast<ConstantUInt>(C);
BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CUI->getValue());
@@ -316,8 +356,10 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
else if (Value == +1.0)
BMI(MBB, IP, X86::FLD1, 0, R);
else {
- std::cerr << "Cannot load constant '" << Value << "'!\n";
- assert(0);
+ // Otherwise we need to spill the constant to memory...
+ MachineConstantPool *CP = F->getConstantPool();
+ unsigned CPI = CP->getConstantPoolIndex(CFP);
+ addConstantPoolReference(doFPLoad(MBB, IP, CFP->getType(), R), CPI);
}
} else if (isa<ConstantPointerNull>(C)) {
@@ -340,19 +382,17 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
// X86, the stack frame looks like this:
//
// [ESP] -- return address
- // [ESP + 4] -- first argument (leftmost lexically) if four bytes in size
- // [ESP + 8] -- second argument, if four bytes in size
+ // [ESP + 4] -- first argument (leftmost lexically)
+ // [ESP + 8] -- second argument, if first argument is four bytes in size
// ...
//
- unsigned ArgOffset = 0;
+ unsigned ArgOffset = 4;
MachineFrameInfo *MFI = F->getFrameInfo();
for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
unsigned Reg = getReg(*I);
- ArgOffset += 4; // Each argument takes at least 4 bytes on the stack...
int FI; // Frame object index
-
switch (getClassB(I->getType())) {
case cByte:
FI = MFI->CreateFixedObject(1, ArgOffset);
@@ -366,6 +406,12 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
FI = MFI->CreateFixedObject(4, ArgOffset);
addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI);
break;
+ case cLong:
+ FI = MFI->CreateFixedObject(8, ArgOffset);
+ addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg), FI);
+ addFrameReference(BuildMI(BB, X86::MOVmr32, 4, Reg+1), FI, 4);
+ ArgOffset += 4; // longs require 4 additional bytes
+ break;
case cFP:
unsigned Opcode;
if (I->getType() == Type::FloatTy) {
@@ -373,14 +419,15 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
FI = MFI->CreateFixedObject(4, ArgOffset);
} else {
Opcode = X86::FLDr64;
- ArgOffset += 4; // doubles require 4 additional bytes
FI = MFI->CreateFixedObject(8, ArgOffset);
+ ArgOffset += 4; // doubles require 4 additional bytes
}
addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
break;
default:
assert(0 && "Unhandled argument type!");
}
+ ArgOffset += 4; // Each argument takes at least 4 bytes on the stack...
}
}
@@ -390,6 +437,7 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
/// the current one.
///
void ISel::SelectPHINodes() {
+ const MachineInstrInfo &MII = TM.getInstrInfo();
const Function &LF = *F->getFunction(); // The LLVM function...
for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
const BasicBlock *BB = I;
@@ -399,9 +447,17 @@ void ISel::SelectPHINodes() {
unsigned NumPHIs = 0;
for (BasicBlock::const_iterator I = BB->begin();
PHINode *PN = (PHINode*)dyn_cast<PHINode>(&*I); ++I) {
+
// Create a new machine instr PHI node, and insert it.
- MachineInstr *MI = BuildMI(X86::PHI, PN->getNumOperands(), getReg(*PN));
- MBB->insert(MBB->begin()+NumPHIs++, MI); // Insert it at the top of the BB
+ unsigned PHIReg = getReg(*PN);
+ MachineInstr *PhiMI = BuildMI(X86::PHI, PN->getNumOperands(), PHIReg);
+ MBB->insert(MBB->begin()+NumPHIs++, PhiMI);
+
+ MachineInstr *LongPhiMI = 0;
+ if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy) {
+ LongPhiMI = BuildMI(X86::PHI, PN->getNumOperands(), PHIReg+1);
+ MBB->insert(MBB->begin()+NumPHIs++, LongPhiMI);
+ }
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)];
@@ -410,13 +466,17 @@ void ISel::SelectPHINodes() {
// available in a virtual register, insert the computation code into
// PredMBB
//
- // FIXME: This should insert the code into the BOTTOM of the block, not
- // the top of the block. This just makes for huge live ranges...
- MachineBasicBlock::iterator PI = PredMBB->begin();
- while ((*PI)->getOpcode() == X86::PHI) ++PI;
-
- MI->addRegOperand(getReg(PN->getIncomingValue(i), PredMBB, PI));
- MI->addMachineBasicBlockOperand(PredMBB);
+ MachineBasicBlock::iterator PI = PredMBB->end();
+ while (PI != PredMBB->begin() &&
+ MII.isTerminatorInstr((*(PI-1))->getOpcode()))
+ --PI;
+ unsigned ValReg = getReg(PN->getIncomingValue(i), PredMBB, PI);
+ PhiMI->addRegOperand(ValReg);
+ PhiMI->addMachineBasicBlockOperand(PredMBB);
+ if (LongPhiMI) {
+ LongPhiMI->addRegOperand(ValReg+1);
+ LongPhiMI->addMachineBasicBlockOperand(PredMBB);
+ }
}
}
}
@@ -426,102 +486,108 @@ void ISel::SelectPHINodes() {
/// SetCC instructions - Here we just emit boilerplate code to set a byte-sized
/// register, then move it to wherever the result should be.
-/// We handle FP setcc instructions by pushing them, doing a
-/// compare-and-pop-twice, and then copying the concodes to the main
-/// processor's concodes (I didn't make this up, it's in the Intel manual)
///
void ISel::visitSetCCInst(SetCondInst &I, unsigned OpNum) {
// The arguments are already supposed to be of the same type.
const Type *CompTy = I.getOperand(0)->getType();
+ bool isSigned = CompTy->isSigned();
unsigned reg1 = getReg(I.getOperand(0));
unsigned reg2 = getReg(I.getOperand(1));
+ unsigned DestReg = getReg(I);
+
+ // LLVM -> X86 signed X86 unsigned
+ // ----- ---------- ------------
+ // seteq -> sete sete
+ // setne -> setne setne
+ // setlt -> setl setb
+ // setgt -> setg seta
+ // setle -> setle setbe
+ // setge -> setge setae
+ static const unsigned OpcodeTab[2][6] = {
+ {X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAr, X86::SETBEr, X86::SETAEr},
+ {X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGr, X86::SETLEr, X86::SETGEr},
+ };
- unsigned Class = getClass(CompTy);
+ unsigned Class = getClassB(CompTy);
switch (Class) {
+ default: assert(0 && "Unknown type class!");
// Emit: cmp <var1>, <var2> (do the comparison). We can
// compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
// 32-bit.
case cByte:
- BuildMI (BB, X86::CMPrr8, 2).addReg (reg1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr8, 2).addReg(reg1).addReg(reg2);
break;
case cShort:
- BuildMI (BB, X86::CMPrr16, 2).addReg (reg1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr16, 2).addReg(reg1).addReg(reg2);
break;
case cInt:
- BuildMI (BB, X86::CMPrr32, 2).addReg (reg1).addReg (reg2);
- break;
-
-#if 0
- // Push the variables on the stack with fldl opcodes.
- // FIXME: assuming var1, var2 are in memory, if not, spill to
- // stack first
- case cFP: // Floats
- BuildMI (BB, X86::FLDr32, 1).addReg (reg1);
- BuildMI (BB, X86::FLDr32, 1).addReg (reg2);
+ BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2);
break;
- case cFP (doubles): // Doubles
- BuildMI (BB, X86::FLDr64, 1).addReg (reg1);
- BuildMI (BB, X86::FLDr64, 1).addReg (reg2);
+ case cFP:
+ BuildMI(BB, X86::FpUCOM, 2).addReg(reg1).addReg(reg2);
+ BuildMI(BB, X86::FNSTSWr8, 0);
+ BuildMI(BB, X86::SAHF, 1);
+ isSigned = false; // Compare with unsigned operators
break;
-#endif
+
case cLong:
- default:
- visitInstruction(I);
- }
+ if (OpNum < 2) { // seteq, setne
+ unsigned LoTmp = makeAnotherReg(Type::IntTy);
+ unsigned HiTmp = makeAnotherReg(Type::IntTy);
+ unsigned FinalTmp = makeAnotherReg(Type::IntTy);
+ BuildMI(BB, X86::XORrr32, 2, LoTmp).addReg(reg1).addReg(reg2);
+ BuildMI(BB, X86::XORrr32, 2, HiTmp).addReg(reg1+1).addReg(reg2+1);
+ BuildMI(BB, X86::ORrr32, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
+ break; // Allow the sete or setne to be generated from flags set by OR
+ } else {
+ // Emit a sequence of code which compares the high and low parts once
+ // each, then uses a conditional move to handle the overflow case. For
+ // example, a setlt for long would generate code like this:
+ //
+ // AL = lo(op1) < lo(op2) // Signedness depends on operands
+ // BL = hi(op1) < hi(op2) // Always unsigned comparison
+ // dest = hi(op1) == hi(op2) ? AL : BL;
+ //
-#if 0
- if (CompTy->isFloatingPoint()) {
- // (Non-trapping) compare and pop twice.
- BuildMI (BB, X86::FUCOMPP, 0);
- // Move fp status word (concodes) to ax.
- BuildMI (BB, X86::FNSTSWr8, 1, X86::AX);
- // Load real concodes from ax.
- BuildMI (BB, X86::SAHF, 1).addReg(X86::AH);
+ // FIXME: This would be much better if we had heirarchical register
+ // classes! Until then, hardcode registers so that we can deal with their
+ // aliases (because we don't have conditional byte moves).
+ //
+ BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2);
+ BuildMI(BB, OpcodeTab[0][OpNum], 0, X86::AL);
+ BuildMI(BB, X86::CMPrr32, 2).addReg(reg1+1).addReg(reg2+1);
+ BuildMI(BB, OpcodeTab[isSigned][OpNum], 0, X86::BL);
+ BuildMI(BB, X86::CMOVErr16, 2, X86::BX).addReg(X86::BX).addReg(X86::AX);
+ BuildMI(BB, X86::MOVrr8, 1, DestReg).addReg(X86::BL);
+ return;
+ }
}
-#endif
- // Emit setOp instruction (extract concode; clobbers ax),
- // using the following mapping:
- // LLVM -> X86 signed X86 unsigned
- // ----- ----- -----
- // seteq -> sete sete
- // setne -> setne setne
- // setlt -> setl setb
- // setgt -> setg seta
- // setle -> setle setbe
- // setge -> setge setae
-
- static const unsigned OpcodeTab[2][6] = {
- {X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAr, X86::SETBEr, X86::SETAEr},
- {X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGr, X86::SETLEr, X86::SETGEr},
- };
-
- BuildMI(BB, OpcodeTab[CompTy->isSigned()][OpNum], 0, getReg(I));
+ BuildMI(BB, OpcodeTab[isSigned][OpNum], 0, DestReg);
}
/// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide
/// operand, in the specified target register.
-void ISel::promote32 (unsigned targetReg, Value *v) {
- unsigned vReg = getReg(v);
- bool isUnsigned = v->getType()->isUnsigned();
- switch (getClass(v->getType())) {
+void ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
+ bool isUnsigned = VR.Ty->isUnsigned();
+ switch (getClassB(VR.Ty)) {
case cByte:
// Extend value into target register (8->32)
if (isUnsigned)
- BuildMI(BB, X86::MOVZXr32r8, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVZXr32r8, 1, targetReg).addReg(VR.Reg);
else
- BuildMI(BB, X86::MOVSXr32r8, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVSXr32r8, 1, targetReg).addReg(VR.Reg);
break;
case cShort:
// Extend value into target register (16->32)
if (isUnsigned)
- BuildMI(BB, X86::MOVZXr32r16, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVZXr32r16, 1, targetReg).addReg(VR.Reg);
else
- BuildMI(BB, X86::MOVSXr32r16, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVSXr32r16, 1, targetReg).addReg(VR.Reg);
break;
case cInt:
// Move value into target register (32->32)
- BuildMI(BB, X86::MOVrr32, 1, targetReg).addReg(vReg);
+ BuildMI(BB, X86::MOVrr32, 1, targetReg).addReg(VR.Reg);
break;
default:
assert(0 && "Unpromotable operand class in promote32");
@@ -539,27 +605,29 @@ void ISel::promote32 (unsigned targetReg, Value *v) {
/// ret long, ulong : Move value into EAX/EDX and return
/// ret float/double : Top of FP stack
///
-void ISel::visitReturnInst (ReturnInst &I) {
+void ISel::visitReturnInst(ReturnInst &I) {
if (I.getNumOperands() == 0) {
BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
return;
}
Value *RetVal = I.getOperand(0);
- switch (getClass(RetVal->getType())) {
+ unsigned RetReg = getReg(RetVal);
+ switch (getClassB(RetVal->getType())) {
case cByte: // integral return values: extend or move into EAX and return
case cShort:
case cInt:
- promote32(X86::EAX, RetVal);
+ promote32(X86::EAX, ValueRecord(RetReg, RetVal->getType()));
break;
case cFP: // Floats & Doubles: Return in ST(0)
- BuildMI(BB, X86::FpMOV, 1, X86::ST0).addReg(getReg(RetVal));
+ BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg);
break;
case cLong:
- // ret long: use EAX(least significant 32 bits)/EDX (most
- // significant 32)...
+ BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(RetReg);
+ BuildMI(BB, X86::MOVrr32, 1, X86::EDX).addReg(RetReg+1);
+ break;
default:
- visitInstruction (I);
+ visitInstruction(I);
}
// Emit a 'ret' instruction
BuildMI(BB, X86::RET, 0);
@@ -572,36 +640,33 @@ void ISel::visitReturnInst (ReturnInst &I) {
///
void ISel::visitBranchInst(BranchInst &BI) {
if (BI.isConditional()) {
- BasicBlock *ifTrue = BI.getSuccessor(0);
- BasicBlock *ifFalse = BI.getSuccessor(1);
-
- // Compare condition with zero, followed by jump-if-equal to ifFalse, and
- // jump-if-nonequal to ifTrue
unsigned condReg = getReg(BI.getCondition());
BuildMI(BB, X86::CMPri8, 2).addReg(condReg).addZImm(0);
- BuildMI(BB, X86::JNE, 1).addPCDisp(BI.getSuccessor(0));
BuildMI(BB, X86::JE, 1).addPCDisp(BI.getSuccessor(1));
- } else { // unconditional branch
- BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0));
}
+ BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0));
}
-/// visitCallInst - Push args on stack and do a procedure call instruction.
-void ISel::visitCallInst(CallInst &CI) {
+
+/// doCall - This emits an abstract call instruction, setting up the arguments
+/// and the return value as appropriate. For the actual function call itself,
+/// it inserts the specified CallMI instruction into the stream.
+///
+void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
+ const std::vector<ValueRecord> &Args) {
+
// Count how many bytes are to be pushed on the stack...
unsigned NumBytes = 0;
- if (CI.getNumOperands() > 1) {
- for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
- switch (getClass(CI.getOperand(i)->getType())) {
+ if (!Args.empty()) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ switch (getClassB(Args[i].Ty)) {
case cByte: case cShort: case cInt:
- NumBytes += 4;
- break;
+ NumBytes += 4; break;
case cLong:
- NumBytes += 8;
- break;
+ NumBytes += 8; break;
case cFP:
- NumBytes += CI.getOperand(i)->getType() == Type::FloatTy ? 4 : 8;
+ NumBytes += Args[i].Ty == Type::FloatTy ? 4 : 8;
break;
default: assert(0 && "Unknown class!");
}
@@ -611,60 +676,60 @@ void ISel::visitCallInst(CallInst &CI) {
// Arguments go on the stack in reverse order, as specified by the ABI.
unsigned ArgOffset = 0;
- for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) {
- Value *Arg = CI.getOperand(i);
- switch (getClass(Arg->getType())) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ unsigned ArgReg = Args[i].Reg;
+ switch (getClassB(Args[i].Ty)) {
case cByte:
case cShort: {
// Promote arg to 32 bits wide into a temporary register...
unsigned R = makeAnotherReg(Type::UIntTy);
- promote32(R, Arg);
+ promote32(R, Args[i]);
addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
X86::ESP, ArgOffset).addReg(R);
break;
}
case cInt:
addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ X86::ESP, ArgOffset).addReg(ArgReg);
break;
-
+ case cLong:
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ addRegOffset(BuildMI(BB, X86::MOVrm32, 5),
+ X86::ESP, ArgOffset+4).addReg(ArgReg+1);
+ ArgOffset += 4; // 8 byte entry, not 4.
+ break;
+
case cFP:
- if (Arg->getType() == Type::FloatTy) {
+ if (Args[i].Ty == Type::FloatTy) {
addRegOffset(BuildMI(BB, X86::FSTr32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ X86::ESP, ArgOffset).addReg(ArgReg);
} else {
- assert(Arg->getType() == Type::DoubleTy && "Unknown FP type!");
- ArgOffset += 4;
- addRegOffset(BuildMI(BB, X86::FSTr32, 5),
- X86::ESP, ArgOffset).addReg(getReg(Arg));
+ assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!");
+ addRegOffset(BuildMI(BB, X86::FSTr64, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ ArgOffset += 4; // 8 byte entry, not 4.
}
break;
- default:
- // FIXME: long/ulong/float/double args not handled.
- visitInstruction(CI);
- break;
+ default: assert(0 && "Unknown class!");
}
ArgOffset += 4;
}
- }
-
- if (Function *F = CI.getCalledFunction()) {
- // Emit a CALL instruction with PC-relative displacement.
- BuildMI(BB, X86::CALLpcrel32, 1).addPCDisp(F);
} else {
- unsigned Reg = getReg(CI.getCalledValue());
- BuildMI(BB, X86::CALLr32, 1).addReg(Reg);
+ BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addZImm(0);
}
+ BB->push_back(CallMI);
+
BuildMI(BB, X86::ADJCALLSTACKUP, 1).addZImm(NumBytes);
// If there is a return value, scavenge the result from the location the call
// leaves it in...
//
- if (CI.getType() != Type::VoidTy) {
- unsigned resultTypeClass = getClass(CI.getType());
- switch (resultTypeClass) {
+ if (Ret.Ty != Type::VoidTy) {
+ unsigned DestClass = getClassB(Ret.Ty);
+ switch (DestClass) {
case cByte:
case cShort:
case cInt: {
@@ -674,32 +739,49 @@ void ISel::visitCallInst(CallInst &CI) {
X86::MOVrr8, X86::MOVrr16, X86::MOVrr32
};
static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
- BuildMI(BB, regRegMove[resultTypeClass], 1, getReg(CI))
- .addReg(AReg[resultTypeClass]);
+ BuildMI(BB, regRegMove[DestClass], 1, Ret.Reg).addReg(AReg[DestClass]);
break;
}
case cFP: // Floating-point return values live in %ST(0)
- BuildMI(BB, X86::FpMOV, 1, getReg(CI)).addReg(X86::ST0);
+ BuildMI(BB, X86::FpGETRESULT, 1, Ret.Reg);
break;
- default:
- std::cerr << "Cannot get return value for call of type '"
- << *CI.getType() << "'\n";
- visitInstruction(CI);
+ case cLong: // Long values are left in EDX:EAX
+ BuildMI(BB, X86::MOVrr32, 1, Ret.Reg).addReg(X86::EAX);
+ BuildMI(BB, X86::MOVrr32, 1, Ret.Reg+1).addReg(X86::EDX);
+ break;
+ default: assert(0 && "Unknown class!");
}
}
}
+
+/// visitCallInst - Push args on stack and do a procedure call instruction.
+void ISel::visitCallInst(CallInst &CI) {
+ MachineInstr *TheCall;
+ if (Function *F = CI.getCalledFunction()) {
+ // Emit a CALL instruction with PC-relative displacement.
+ TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true);
+ } else { // Emit an indirect call...
+ unsigned Reg = getReg(CI.getCalledValue());
+ TheCall = BuildMI(X86::CALLr32, 1).addReg(Reg);
+ }
+
+ std::vector<ValueRecord> Args;
+ for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
+ Args.push_back(ValueRecord(getReg(CI.getOperand(i)),
+ CI.getOperand(i)->getType()));
+
+ unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0;
+ doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args);
+}
+
+
/// visitSimpleBinary - Implement simple binary operators for integral types...
/// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or,
/// 4 for Xor.
///
void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
- if (B.getType() == Type::BoolTy) // FIXME: Handle bools for logicals
- visitInstruction(B);
-
- unsigned Class = getClass(B.getType());
- if (Class > cFP) // FIXME: Handle longs
- visitInstruction(B);
+ unsigned Class = getClassB(B.getType());
static const unsigned OpcodeTab[][4] = {
// Arithmetic operators
@@ -711,28 +793,45 @@ void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
{ X86:: ORrr8, X86:: ORrr16, X86:: ORrr32, 0 }, // OR
{ X86::XORrr8, X86::XORrr16, X86::XORrr32, 0 }, // XOR
};
+
+ bool isLong = false;
+ if (Class == cLong) {
+ isLong = true;
+ Class = cInt; // Bottom 32 bits are handled just like ints
+ }
unsigned Opcode = OpcodeTab[OperatorClass][Class];
assert(Opcode && "Floating point arguments to logical inst?");
unsigned Op0r = getReg(B.getOperand(0));
unsigned Op1r = getReg(B.getOperand(1));
- BuildMI(BB, Opcode, 2, getReg(B)).addReg(Op0r).addReg(Op1r);
+ unsigned DestReg = getReg(B);
+ BuildMI(BB, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
+
+ if (isLong) { // Handle the upper 32 bits of long values...
+ static const unsigned TopTab[] = {
+ X86::ADCrr32, X86::SBBrr32, X86::ANDrr32, X86::ORrr32, X86::XORrr32
+ };
+ BuildMI(BB, TopTab[OperatorClass], 2,
+ DestReg+1).addReg(Op0r+1).addReg(Op1r+1);
+ }
}
-/// doMultiply - Emit appropriate instructions to multiply together
-/// the registers op0Reg and op1Reg, and put the result in destReg.
-/// The type of the result should be given as resultType.
+/// doMultiply - Emit appropriate instructions to multiply together the
+/// registers op0Reg and op1Reg, and put the result in DestReg. The type of the
+/// result should be given as DestTy.
+///
+/// FIXME: doMultiply should use one of the two address IMUL instructions!
+///
void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
- unsigned destReg, const Type *resultType,
+ unsigned DestReg, const Type *DestTy,
unsigned op0Reg, unsigned op1Reg) {
- unsigned Class = getClass(resultType);
+ unsigned Class = getClass(DestTy);
switch (Class) {
case cFP: // Floating point multiply
- BuildMI(BB, X86::FpMUL, 2, destReg).addReg(op0Reg).addReg(op1Reg);
+ BMI(BB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg);
return;
default:
- case cLong:
- assert(0 && "doMultiply not implemented for this class yet!");
+ case cLong: assert(0 && "doMultiply cannot operate on LONG values!");
case cByte:
case cShort:
case cInt: // Small integerals, handled below...
@@ -740,30 +839,58 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
}
static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
- static const unsigned MulOpcode[]={ X86::MULrr8, X86::MULrr16, X86::MULrr32 };
+ static const unsigned MulOpcode[]={ X86::MULr8 , X86::MULr16 , X86::MULr32 };
static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 };
unsigned Reg = Regs[Class];
// Emit a MOV to put the first operand into the appropriately-sized
// subreg of EAX.
- BMI(MBB, MBBI, MovOpcode[Class], 1, Reg).addReg (op0Reg);
+ BMI(MBB, MBBI, MovOpcode[Class], 1, Reg).addReg(op0Reg);
// Emit the appropriate multiply instruction.
- BMI(MBB, MBBI, MulOpcode[Class], 1).addReg (op1Reg);
+ BMI(MBB, MBBI, MulOpcode[Class], 1).addReg(op1Reg);
// Emit another MOV to put the result into the destination register.
- BMI(MBB, MBBI, MovOpcode[Class], 1, destReg).addReg (Reg);
+ BMI(MBB, MBBI, MovOpcode[Class], 1, DestReg).addReg(Reg);
}
/// visitMul - Multiplies are not simple binary operators because they must deal
/// with the EAX register explicitly.
///
void ISel::visitMul(BinaryOperator &I) {
- unsigned DestReg = getReg(I);
unsigned Op0Reg = getReg(I.getOperand(0));
unsigned Op1Reg = getReg(I.getOperand(1));
- MachineBasicBlock::iterator MBBI = BB->end();
- doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg);
+ unsigned DestReg = getReg(I);
+
+ // Simple scalar multiply?
+ if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) {
+ MachineBasicBlock::iterator MBBI = BB->end();
+ doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg);
+ } else {
+ // Long value. We have to do things the hard way...
+ // Multiply the two low parts... capturing carry into EDX
+ BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(Op0Reg);
+ BuildMI(BB, X86::MULr32, 1).addReg(Op1Reg); // AL*BL
+
+ unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, X86::MOVrr32, 1, DestReg).addReg(X86::EAX); // AL*BL
+ BuildMI(BB, X86::MOVrr32, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
+
+ MachineBasicBlock::iterator MBBI = BB->end();
+ unsigned AHBLReg = makeAnotherReg(Type::UIntTy);
+ doMultiply(BB, MBBI, AHBLReg, Type::UIntTy, Op0Reg+1, Op1Reg); // AH*BL
+
+ unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, X86::ADDrr32, 2, // AH*BL+(AL*BL >> 32)
+ AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
+
+ MBBI = BB->end();
+ unsigned ALBHReg = makeAnotherReg(Type::UIntTy);
+ doMultiply(BB, MBBI, ALBHReg, Type::UIntTy, Op0Reg, Op1Reg+1); // AL*BH
+
+ BuildMI(BB, X86::ADDrr32, 2, // AL*BH + AH*BL + (AL*BL >> 32)
+ DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
+ }
}
@@ -779,19 +906,36 @@ void ISel::visitDivRem(BinaryOperator &I) {
unsigned ResultReg = getReg(I);
switch (Class) {
- case cFP: // Floating point multiply
+ case cFP: // Floating point divide
if (I.getOpcode() == Instruction::Div)
BuildMI(BB, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
- else
- BuildMI(BB, X86::FpREM, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
+ else { // Floating point remainder...
+ MachineInstr *TheCall =
+ BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true);
+ std::vector<ValueRecord> Args;
+ Args.push_back(ValueRecord(Op0Reg, Type::DoubleTy));
+ Args.push_back(ValueRecord(Op1Reg, Type::DoubleTy));
+ doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args);
+ }
+ return;
+ case cLong: {
+ static const char *FnName[] =
+ { "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" };
+
+ unsigned NameIdx = I.getType()->isUnsigned()*2;
+ NameIdx += I.getOpcode() == Instruction::Div;
+ MachineInstr *TheCall =
+ BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true);
+
+ std::vector<ValueRecord> Args;
+ Args.push_back(ValueRecord(Op0Reg, Type::LongTy));
+ Args.push_back(ValueRecord(Op1Reg, Type::LongTy));
+ doCall(ValueRecord(ResultReg, Type::LongTy), TheCall, Args);
return;
- default:
- case cLong:
- assert(0 && "div/rem not implemented for this class yet!");
- case cByte:
- case cShort:
- case cInt: // Small integerals, handled below...
- break;
+ }
+ case cByte: case cShort: case cInt:
+ break; // Small integerals, handled below...
+ default: assert(0 && "Unknown class!");
}
static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
@@ -801,8 +945,8 @@ void ISel::visitDivRem(BinaryOperator &I) {
static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX };
static const unsigned DivOpcode[][4] = {
- { X86::DIVrr8 , X86::DIVrr16 , X86::DIVrr32 , 0 }, // Unsigned division
- { X86::IDIVrr8, X86::IDIVrr16, X86::IDIVrr32, 0 }, // Signed division
+ { X86::DIVr8 , X86::DIVr16 , X86::DIVr32 , 0 }, // Unsigned division
+ { X86::IDIVr8, X86::IDIVr16, X86::IDIVr32, 0 }, // Signed division
};
bool isSigned = I.getType()->isSigned();
@@ -836,60 +980,149 @@ void ISel::visitDivRem(BinaryOperator &I) {
/// shift values equal to 1. Even the general case is sort of special,
/// because the shift amount has to be in CL, not just any old register.
///
-void ISel::visitShiftInst (ShiftInst &I) {
- unsigned Op0r = getReg (I.getOperand(0));
+void ISel::visitShiftInst(ShiftInst &I) {
+ unsigned SrcReg = getReg(I.getOperand(0));
unsigned DestReg = getReg(I);
bool isLeftShift = I.getOpcode() == Instruction::Shl;
- bool isOperandSigned = I.getType()->isU