diff options
author | Chris Lattner <sabre@nondot.org> | 2004-04-06 17:34:50 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2004-04-06 17:34:50 +0000 |
commit | 48c937e5c9808cc63d2cd04d823642d875c6bf0a (patch) | |
tree | bf8daa41d5ec17f148e59969c0f2d8e467566ee0 /lib | |
parent | b04da8a3c6488fe396c6d61eec9980598b8c84ae (diff) |
Fix a minor bug in previous checking
Enable folding of long seteq/setne comparisons into branches and select instructions
Implement unfolded long relational comparisons against a constants a bit more efficiently
Folding comparisons changes code that looks like this:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
mov %ECX, %EAX
or %ECX, %EDX
sete %CL
test %CL, %CL
je .LBB2 # PC rel: F
into code that looks like this:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
mov %ECX, %EAX
or %ECX, %EDX
jne .LBB2 # PC rel: F
This speeds up 186.crafty by 6% with llc-ls.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12702 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/InstSelectSimple.cpp | 31 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelSimple.cpp | 31 |
2 files changed, 58 insertions, 4 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index 527bd1b135..05d2d887d1 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -756,7 +756,9 @@ static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) { Instruction *User = cast<Instruction>(SCI->use_back()); if ((isa<BranchInst>(User) || isa<SelectInst>(User)) && SCI->getParent() == User->getParent() && - getClassB(SCI->getOperand(0)->getType()) != cLong) + (getClassB(SCI->getOperand(0)->getType()) != cLong || + SCI->getOpcode() == Instruction::SetEQ || + SCI->getOpcode() == Instruction::SetNE)) return SCI; } return 0; @@ -846,11 +848,36 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, unsigned HiTmp = Op0r+1; if (HiCst != 0) { HiTmp = makeAnotherReg(Type::IntTy); - BuildMI(*MBB, IP, X86::XOR32rr, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); } unsigned FinalTmp = makeAnotherReg(Type::IntTy); BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); return OpNum; + } else { + // Emit a sequence of code which compares the high and low parts once + // each, then uses a conditional move to handle the overflow case. For + // example, a setlt for long would generate code like this: + // + // AL = lo(op1) < lo(op2) // Signedness depends on operands + // BL = hi(op1) < hi(op2) // Always unsigned comparison + // dest = hi(op1) == hi(op2) ? AL : BL; + // + + // FIXME: This would be much better if we had hierarchical register + // classes! Until then, hardcode registers so that we can deal with + // their aliases (because we don't have conditional byte moves). + // + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL); + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH); + BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX) + .addReg(X86::AX); + // NOTE: visitSetCondInst knows that the value is dumped into the BL + // register at this point for long values... + return OpNum; } } } diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp index 527bd1b135..05d2d887d1 100644 --- a/lib/Target/X86/X86ISelSimple.cpp +++ b/lib/Target/X86/X86ISelSimple.cpp @@ -756,7 +756,9 @@ static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) { Instruction *User = cast<Instruction>(SCI->use_back()); if ((isa<BranchInst>(User) || isa<SelectInst>(User)) && SCI->getParent() == User->getParent() && - getClassB(SCI->getOperand(0)->getType()) != cLong) + (getClassB(SCI->getOperand(0)->getType()) != cLong || + SCI->getOpcode() == Instruction::SetEQ || + SCI->getOpcode() == Instruction::SetNE)) return SCI; } return 0; @@ -846,11 +848,36 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, unsigned HiTmp = Op0r+1; if (HiCst != 0) { HiTmp = makeAnotherReg(Type::IntTy); - BuildMI(*MBB, IP, X86::XOR32rr, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); } unsigned FinalTmp = makeAnotherReg(Type::IntTy); BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); return OpNum; + } else { + // Emit a sequence of code which compares the high and low parts once + // each, then uses a conditional move to handle the overflow case. For + // example, a setlt for long would generate code like this: + // + // AL = lo(op1) < lo(op2) // Signedness depends on operands + // BL = hi(op1) < hi(op2) // Always unsigned comparison + // dest = hi(op1) == hi(op2) ? AL : BL; + // + + // FIXME: This would be much better if we had hierarchical register + // classes! Until then, hardcode registers so that we can deal with + // their aliases (because we don't have conditional byte moves). + // + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL); + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH); + BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX) + .addReg(X86::AX); + // NOTE: visitSetCondInst knows that the value is dumped into the BL + // register at this point for long values... + return OpNum; } } } |