diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-10-01 11:20:30 -0700 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-10-01 11:20:30 -0700 |
commit | b3423dd295c69f78bd731f1ad65ad90ce3efa36f (patch) | |
tree | 0e872df2f0333ed1806d9e0a6906b2f5ebd58512 /lib/Target/X86 | |
parent | a27c28b1427dc2082ab2b31efdbb25f9fde31b61 (diff) | |
parent | 72f0976c1b91c7ba50dce4d0ad0289dc14d37f81 (diff) |
Merge commit '72f0976c1b91c7ba50dce4d0ad0289dc14d37f81'
Conflicts:
lib/Target/ARM/ARMISelDAGToDAG.cpp
lib/Target/Mips/MipsISelLowering.cpp
lib/Target/Mips/MipsSubtarget.cpp
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 23 | ||||
-rw-r--r-- | lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 7 | ||||
-rw-r--r-- | lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp | 7 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 5 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp | 48 | ||||
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 149 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 100 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 57 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 18 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 12 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrShiftRotate.td | 78 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 1 |
15 files changed, 407 insertions, 107 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 77961e53ae..9263bdde20 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -60,10 +60,6 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); @@ -77,13 +73,6 @@ private: unsigned &OrigErrorInfo, bool matchingInlineAsm = false); - unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, - NumMCOperands); - } - /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -1636,16 +1625,20 @@ MatchInstruction(SMLoc IDLoc, unsigned &Kind, unsigned Match1, Match2, Match3, Match4; unsigned tKind; - Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match1 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match2 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match3 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match4 == Match_Success) Kind = tKind; // Restore the old token. diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 46e72f9f60..b123afa001 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" @@ -38,6 +39,12 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS, void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::LOCK) + OS << "\tlock\n"; + // Try to print any aliases first. if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index ad14e34707..f9bb3be9d7 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCExpr.h" @@ -32,6 +33,12 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::LOCK) + OS << "\tlock\n"; + printInstruction(MI, OS); // Next always print the annotation. diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 3c0e3e6f2d..7706b9308e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -34,6 +34,10 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(ATT), clEnumValN(Intel, "intel", "Emit Intel-style assembly"), clEnumValEnd)); +static cl::opt<bool> +MarkedJTDataRegions("mark-data-regions", cl::init(false), + cl::desc("Mark code section jump table data regions."), + cl::Hidden); void X86MCAsmInfoDarwin::anchor() { } @@ -59,6 +63,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { SupportsDebugInformation = true; DwarfUsesInlineInfoSection = true; + UseDataRegionDirectives = MarkedJTDataRegions; // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index f0f1982d57..7ff058edbc 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -11,11 +11,13 @@ #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Object/MachOFormat.h" using namespace llvm; @@ -23,7 +25,7 @@ using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { - void RecordScatteredRelocation(MachObjectWriter *Writer, + bool RecordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -335,7 +337,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Writer->addRelocation(Fragment->getParent(), MRE); } -void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, +bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -381,6 +383,19 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Relocations are written out in reverse order, so the PAIR comes first. if (Type == macho::RIT_Difference || Type == macho::RIT_Generic_LocalDifference) { + // If the offset is too large to fit in a scattered relocation, + // we're hosed. It's an unfortunate limitation of the MachO format. + if (FixupOffset > 0xffffff) { + char Buffer[32]; + format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer)); + Asm.getContext().FatalError(Fixup.getLoc(), + Twine("Section too large, can't encode " + "r_address (") + Buffer + + ") into 24 bits of scattered " + "relocation entry."); + llvm_unreachable("fatal error returned?!"); + } + macho::RelocationEntry MRE; MRE.Word0 = ((0 << 0) | (macho::RIT_Pair << 24) | @@ -389,6 +404,16 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, macho::RF_Scattered); MRE.Word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); + } else { + // If the offset is more than 24-bits, it won't fit in a scattered + // relocation offset field, so we fall back to using a non-scattered + // relocation. This is a bit risky, as if the offset reaches out of + // the block and the linker is doing scattered loading on this + // symbol, things can go badly. + // + // Required for 'as' compatibility. + if (FixupOffset > 0xffffff) + return false; } macho::RelocationEntry MRE; @@ -399,6 +424,7 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, macho::RF_Scattered); MRE.Word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); + return true; } void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, @@ -469,9 +495,11 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered // relocations. - if (Target.getSymB()) - return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + if (Target.getSymB()) { + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + return; + } // Get the symbol data, if any. MCSymbolData *SD = 0; @@ -483,9 +511,13 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, uint32_t Offset = Target.getConstant(); if (IsPCRel) Offset += 1 << Log2Size; - if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) - return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + // Try to record the scattered relocation if needed. Fall back to non + // scattered if necessary (see comments in RecordScatteredRelocation() + // for details). + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) && + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue)) + return; // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 0d8def0e47..85922f1277 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -676,7 +676,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). - if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && + if (Is64Bit && !Fn->getFnAttributes().hasNoRedZoneAttr() && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index b409e88148..767e261a82 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -443,7 +443,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + OptForSize = MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { @@ -2253,6 +2253,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: case X86ISD::ATOMNAND64_DAG: case X86ISD::ATOMAND64_DAG: + case X86ISD::ATOMMAX64_DAG: + case X86ISD::ATOMMIN64_DAG: + case X86ISD::ATOMUMAX64_DAG: + case X86ISD::ATOMUMIN64_DAG: case X86ISD::ATOMSWAP64_DAG: { unsigned Opc; switch (Opcode) { @@ -2263,6 +2267,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; + case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; + case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; + case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; } SDNode *RetVal = SelectAtomic64(Node, Opc); @@ -2389,13 +2397,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); bool isSigned = Opcode == ISD::SMUL_LOHI; + bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; + case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; + MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; + case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; + MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { switch (NVT.getSimpleVT().SimpleTy) { @@ -2407,13 +2418,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + unsigned SrcReg, LoReg, HiReg; + switch (Opc) { + default: llvm_unreachable("Unknown MUL opcode!"); + case X86::IMUL8r: + case X86::MUL8r: + SrcReg = LoReg = X86::AL; HiReg = X86::AH; + break; + case X86::IMUL16r: + case X86::MUL16r: + SrcReg = LoReg = X86::AX; HiReg = X86::DX; + break; + case X86::IMUL32r: + case X86::MUL32r: + SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; + break; + case X86::IMUL64r: + case X86::MUL64r: + SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; + break; + case X86::MULX32rr: + SrcReg = X86::EDX; LoReg = HiReg = 0; + break; + case X86::MULX64rr: + SrcReg = X86::RDX; LoReg = HiReg = 0; + break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; @@ -2425,22 +2454,47 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { std::swap(N0, N1); } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { + SDValue Chain; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); + if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, + array_lengthof(Ops)); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + InFlag = SDValue(CNode, 3); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, + array_lengthof(Ops)); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + ReplaceUses(N1.getValue(1), Chain); } else { - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); - InFlag = SDValue(CNode, 0); + SDValue Ops[] = { N1, InFlag }; + if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, + array_lengthof(Ops)); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + InFlag = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 0); + } } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2465,19 +2519,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResLo.getNode() == 0) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, + InFlag); + InFlag = ResLo.getValue(2); + } + ReplaceUses(SDValue(Node, 0), ResLo); + DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResHi.getNode() == 0) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, + InFlag); + InFlag = ResHi.getValue(2); + } + ReplaceUses(SDValue(Node, 1), ResHi); + DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } return NULL; @@ -2678,7 +2738,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i8, Reg); // Emit a testb. - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2709,8 +2775,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only // target GR8_NOREX registers, so make sure the register class is // forced. - return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, - Subreg, ShiftedImm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, + MVT::i32, Subreg, ShiftedImm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2726,7 +2797,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i16, Reg); // Emit a testw. - return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2742,7 +2819,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i32, Reg); // Emit a testl. - return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } } break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bdfe245027..ffaf04cea7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -522,6 +522,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); } if (Subtarget->hasCmpxchg16b()) { @@ -1357,7 +1361,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = MF.getFunction(); if (IsZeroVal && - !F->hasFnAttr(Attribute::NoImplicitFloat)) { + !F->getFnAttributes().hasNoImplicitFloatAttr()) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && @@ -2048,7 +2052,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); - bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = Fn->getFnAttributes().hasNoImplicitFloatAttr(); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && @@ -2240,7 +2244,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, - MF.getFunction()->hasStructRetAttr(), + MF.getFunction()->hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require @@ -2524,7 +2528,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, OpFlags = X86II::MO_DARWIN_STUB; } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) && - cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) { + cast<Function>(GV)->getFnAttributes().hasNonLazyBindAttr()) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). @@ -2761,6 +2765,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, + Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -2772,6 +2777,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If -tailcallopt is specified, make fastcc functions tail-callable. const MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = DAG.getMachineFunction().getFunction(); + + // If the function return type is x86_fp80 and the callee return type is not, + // then the FP_EXTEND of the call result is not a nop. It's not safe to + // perform a tailcall optimization here. + if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) + return false; + CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; @@ -6661,7 +6673,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptForSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); @@ -9783,7 +9795,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Sanity Check: Make sure using fp_offset makes sense. assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() - .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && + .getFunction()->getFnAttributes().hasNoImplicitFloatAttr()) && Subtarget->hasSSE1()); } @@ -11769,6 +11781,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_SWAP: { unsigned Opc; switch (N->getOpcode()) { @@ -11791,6 +11807,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_LOAD_XOR: Opc = X86ISD::ATOMXOR64_DAG; break; + case ISD::ATOMIC_LOAD_MAX: + Opc = X86ISD::ATOMMAX64_DAG; + break; + case ISD::ATOMIC_LOAD_MIN: + Opc = X86ISD::ATOMMIN64_DAG; + break; + case ISD::ATOMIC_LOAD_UMAX: + Opc = X86ISD::ATOMUMAX64_DAG; + break; + case ISD::ATOMIC_LOAD_UMIN: + Opc = X86ISD::ATOMUMIN64_DAG; + break; case ISD::ATOMIC_SWAP: Opc = X86ISD::ATOMSWAP64_DAG; break; @@ -12182,6 +12210,10 @@ static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) { case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr; case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr; case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr; + case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr; + case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr; + case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr; + case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr; } llvm_unreachable("Unhandled atomic-load-op opcode!"); } @@ -12499,6 +12531,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, SrcHiReg = MI->getOperand(CurOp++).getReg(); const TargetRegisterClass *RC = &X86::GR32RegClass; + const TargetRegisterClass *RC8 = &X86::GR8RegClass; unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; unsigned LOADOpc = X86::MOV32rm; @@ -12586,6 +12619,55 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H); break; } + case X86::ATOMMAX6432: + case X86::ATOMMIN6432: + case X86::ATOMUMAX6432: + case X86::ATOMUMIN6432: { + unsigned HiOpc; + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); + unsigned cL = MRI.createVirtualRegister(RC8); + unsigned cH = MRI.createVirtualRegister(RC8); + unsigned cL32 = MRI.createVirtualRegister(RC); + unsigned cH32 = MRI.createVirtualRegister(RC); + unsigned cc = MRI.createVirtualRegister(RC); + // cl := cmp src_lo, lo + BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) + .addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), cL); + BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); + // ch := cmp src_hi, hi + BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) + .addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), cH); + BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); + // cc := if (src_hi == hi) ? cl : ch; + if (Subtarget->hasCMov()) { + BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc) + .addReg(cH32).addReg(cL32); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc) + .addReg(cH32).addReg(cL32) + .addImm(X86::COND_E); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); + if (Subtarget->hasCMov()) { + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L) + .addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H) + .addReg(SrcHiReg).addReg(HiReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L) + .addReg(SrcLoReg).addReg(LoReg) + .addImm(X86::COND_NE); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H) + .addReg(SrcHiReg).addReg(HiReg) + .addImm(X86::COND_NE); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + break; + } case X86::ATOMSWAP6432: { unsigned HiOpc; unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); @@ -13576,6 +13658,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMNAND6432: case X86::ATOMADD6432: case X86::ATOMSUB6432: + case X86::ATOMMAX6432: + case X86::ATOMMIN6432: + case X86::ATOMUMAX6432: + case X86::ATOMUMIN6432: case X86::ATOMSWAP6432: return EmitAtomicLoadArith6432(MI, BB); @@ -15562,7 +15648,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); - bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = F->getFnAttributes().hasNoImplicitFloatAttr(); bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d3545b0e9f..a53909b7a0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -355,6 +355,10 @@ namespace llvm { ATOMXOR64_DAG, ATOMAND64_DAG, ATOMNAND64_DAG, |