Fix a minor bug in previous checking

Enable folding of long seteq/setne comparisons into branches and select instructions Implement unfolded long relational comparisons against a constants a bit more efficiently Folding comparisons changes code that looks like this: mov %EAX, DWORD PTR [%ESP + 4] mov %EDX, DWORD PTR [%ESP + 8] mov %ECX, %EAX or %ECX, %EDX sete %CL test %CL, %CL je .LBB2 # PC rel: F into code that looks like this: mov %EAX, DWORD PTR [%ESP + 4] mov %EDX, DWORD PTR [%ESP + 8] mov %ECX, %EAX or %ECX, %EDX jne .LBB2 # PC rel: F This speeds up 186.crafty by 6% with llc-ls. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12702 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2004-04-06 17:34:50 +0000
committer: Chris Lattner <sabre@nondot.org> 2004-04-06 17:34:50 +0000
commit: 48c937e5c9808cc63d2cd04d823642d875c6bf0a (patch)
tree: bf8daa41d5ec17f148e59969c0f2d8e467566ee0 /lib
parent: b04da8a3c6488fe396c6d61eec9980598b8c84ae (diff)
2 files changed, 58 insertions, 4 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp
index 527bd1b135..05d2d887d1 100644
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@@ -756,7 +756,9 @@ static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
       Instruction *User = cast<Instruction>(SCI->use_back());
       if ((isa<BranchInst>(User) || isa<SelectInst>(User)) &&
           SCI->getParent() == User->getParent() &&
-          getClassB(SCI->getOperand(0)->getType()) != cLong)
+          (getClassB(SCI->getOperand(0)->getType()) != cLong ||
+           SCI->getOpcode() == Instruction::SetEQ ||
+           SCI->getOpcode() == Instruction::SetNE))
         return SCI;
     }
   return 0;
@@ -846,11 +848,36 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
         unsigned HiTmp = Op0r+1;
         if (HiCst != 0) {
           HiTmp = makeAnotherReg(Type::IntTy);
-          BuildMI(*MBB, IP, X86::XOR32rr, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
+          BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
         }
         unsigned FinalTmp = makeAnotherReg(Type::IntTy);
         BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
         return OpNum;
+      } else {
+        // Emit a sequence of code which compares the high and low parts once
+        // each, then uses a conditional move to handle the overflow case.  For
+        // example, a setlt for long would generate code like this:
+        //
+        // AL = lo(op1) < lo(op2)   // Signedness depends on operands
+        // BL = hi(op1) < hi(op2)   // Always unsigned comparison
+        // dest = hi(op1) == hi(op2) ? AL : BL;
+        //
+
+        // FIXME: This would be much better if we had hierarchical register
+        // classes!  Until then, hardcode registers so that we can deal with
+        // their aliases (because we don't have conditional byte moves).
+        //
+        BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst);
+        BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
+        BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst);
+        BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL);
+        BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
+        BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
+        BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
+          .addReg(X86::AX);
+        // NOTE: visitSetCondInst knows that the value is dumped into the BL
+        // register at this point for long values...
+        return OpNum;
       }
     }
   }
diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp
index 527bd1b135..05d2d887d1 100644
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@@ -756,7 +756,9 @@ static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
       Instruction *User = cast<Instruction>(SCI->use_back());
       if ((isa<BranchInst>(User) || isa<SelectInst>(User)) &&
           SCI->getParent() == User->getParent() &&
-          getClassB(SCI->getOperand(0)->getType()) != cLong)
+          (getClassB(SCI->getOperand(0)->getType()) != cLong ||
+           SCI->getOpcode() == Instruction::SetEQ ||
+           SCI->getOpcode() == Instruction::SetNE))
         return SCI;
     }
   return 0;
@@ -846,11 +848,36 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
         unsigned HiTmp = Op0r+1;
         if (HiCst != 0) {
           HiTmp = makeAnotherReg(Type::IntTy);
-          BuildMI(*MBB, IP, X86::XOR32rr, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
+          BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
         }
         unsigned FinalTmp = makeAnotherReg(Type::IntTy);
         BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
         return OpNum;
+      } else {
+        // Emit a sequence of code which compares the high and low parts once
+        // each, then uses a conditional move to handle the overflow case.  For
+        // example, a setlt for long would generate code like this:
+        //
+        // AL = lo(op1) < lo(op2)   // Signedness depends on operands
+        // BL = hi(op1) < hi(op2)   // Always unsigned comparison
+        // dest = hi(op1) == hi(op2) ? AL : BL;
+        //
+
+        // FIXME: This would be much better if we had hierarchical register
+        // classes!  Until then, hardcode registers so that we can deal with
+        // their aliases (because we don't have conditional byte moves).
+        //
+        BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst);
+        BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
+        BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst);
+        BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL);
+        BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
+        BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
+        BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
+          .addReg(X86::AX);
+        // NOTE: visitSetCondInst knows that the value is dumped into the BL
+        // register at this point for long values...
+        return OpNum;
       }
     }
   }
author	Chris Lattner <sabre@nondot.org>	2004-04-06 17:34:50 +0000
committer	Chris Lattner <sabre@nondot.org>	2004-04-06 17:34:50 +0000
commit	48c937e5c9808cc63d2cd04d823642d875c6bf0a (patch)
tree	bf8daa41d5ec17f148e59969c0f2d8e467566ee0 /lib
parent	b04da8a3c6488fe396c6d61eec9980598b8c84ae (diff)