diff options
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 86 | ||||
-rw-r--r-- | test/CodeGen/X86/crash-O0.ll | 31 |
2 files changed, 80 insertions, 37 deletions
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index b846e72ffd..a9603a331f 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1646,6 +1646,26 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); } + // Prevent use of AH in a REX instruction by referencing AX instead. + if (HiReg == X86::AH && Subtarget->is64Bit() && + !SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + // Get the low part if needed. Don't use getCopyFromReg for aliasing + // registers. + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + + // Shift AX down 8 bits. + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1656,24 +1676,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), 0); - // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, - MVT::i8, Result); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } @@ -1786,6 +1791,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); } + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + if (HiReg == X86::AH && Subtarget->is64Bit() && + !SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + + // If we also need AL (the quotient), get it by extracting a subreg from + // Result. The fast register allocator does not like multiple CopyFromReg + // nodes using aliasing registers. + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + + // Shift AX right by 8 bits instead of using AH. + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), + 0); + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + } // Copy the division (low) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1796,25 +1824,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), - 0); - // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, - MVT::i8, Result); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } diff --git a/test/CodeGen/X86/crash-O0.ll b/test/CodeGen/X86/crash-O0.ll new file mode 100644 index 0000000000..956d43b4e8 --- /dev/null +++ b/test/CodeGen/X86/crash-O0.ll @@ -0,0 +1,31 @@ +; RUN: llc -O0 -relocation-model=pic -disable-fp-elim < %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10" + +; This file contains functions that may crash llc -O0 + +; The DIV8 instruction produces results in AH and AL, but we don't want to use +; AH in 64-bit mode. The hack used must not generate copyFromReg nodes for +; aliased registers (AX and AL) - RegAllocFast does not like that. +; PR7312 +define i32 @div8() nounwind { +entry: + %0 = trunc i64 undef to i8 ; <i8> [#uses=3] + %1 = udiv i8 0, %0 ; <i8> [#uses=1] + %2 = urem i8 0, %0 ; <i8> [#uses=1] + %3 = icmp uge i8 %2, %0 ; <i1> [#uses=1] + br i1 %3, label %"40", label %"39" + +"39": ; preds = %"36" + %4 = zext i8 %1 to i32 ; <i32> [#uses=1] + %5 = mul nsw i32 %4, undef ; <i32> [#uses=1] + %6 = add nsw i32 %5, undef ; <i32> [#uses=1] + %7 = icmp ne i32 %6, undef ; <i1> [#uses=1] + br i1 %7, label %"40", label %"41" + +"40": ; preds = %"39", %"36" + unreachable + +"41": ; preds = %"39" + unreachable +} |