aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2012-10-01 11:20:30 -0700
committerDerek Schuff <dschuff@chromium.org>2012-10-01 11:20:30 -0700
commitb3423dd295c69f78bd731f1ad65ad90ce3efa36f (patch)
tree0e872df2f0333ed1806d9e0a6906b2f5ebd58512 /lib/Target/X86
parenta27c28b1427dc2082ab2b31efdbb25f9fde31b61 (diff)
parent72f0976c1b91c7ba50dce4d0ad0289dc14d37f81 (diff)
Merge commit '72f0976c1b91c7ba50dce4d0ad0289dc14d37f81'
Conflicts: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/Mips/MipsISelLowering.cpp lib/Target/Mips/MipsSubtarget.cpp
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp23
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp7
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp5
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp48
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp149
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp100
-rw-r--r--lib/Target/X86/X86ISelLowering.h5
-rw-r--r--lib/Target/X86/X86InstrCompiler.td57
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp18
-rw-r--r--lib/Target/X86/X86InstrInfo.td12
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td78
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.h1
15 files changed, 407 insertions, 107 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 77961e53ae..9263bdde20 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -60,10 +60,6 @@ private:
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
- bool mnemonicIsValid(StringRef Mnemonic) {
- return mnemonicIsValidImpl(Mnemonic);
- }
-
bool processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
@@ -77,13 +73,6 @@ private:
unsigned &OrigErrorInfo,
bool matchingInlineAsm = false);
- unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- unsigned OperandNum, unsigned &NumMCOperands) {
- return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum,
- NumMCOperands);
- }
-
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
@@ -1636,16 +1625,20 @@ MatchInstruction(SMLoc IDLoc, unsigned &Kind,
unsigned Match1, Match2, Match3, Match4;
unsigned tKind;
- Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
if (Match1 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[1];
- Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
if (Match2 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[2];
- Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
if (Match3 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[3];
- Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore,
+ isParsingIntelSyntax());
if (Match4 == Match_Success) Kind = tKind;
// Restore the old token.
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 46e72f9f60..b123afa001 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -38,6 +39,12 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS,
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
// Try to print any aliases first.
if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index ad14e34707..f9bb3be9d7 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
#include "X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
@@ -32,6 +33,12 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
printInstruction(MI, OS);
// Next always print the annotation.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 3c0e3e6f2d..7706b9308e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -34,6 +34,10 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
clEnumValEnd));
+static cl::opt<bool>
+MarkedJTDataRegions("mark-data-regions", cl::init(false),
+ cl::desc("Mark code section jump table data regions."),
+ cl::Hidden);
void X86MCAsmInfoDarwin::anchor() { }
@@ -59,6 +63,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
SupportsDebugInformation = true;
DwarfUsesInlineInfoSection = true;
+ UseDataRegionDirectives = MarkedJTDataRegions;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index f0f1982d57..7ff058edbc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -11,11 +11,13 @@
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Object/MachOFormat.h"
using namespace llvm;
@@ -23,7 +25,7 @@ using namespace llvm::object;
namespace {
class X86MachObjectWriter : public MCMachObjectTargetWriter {
- void RecordScatteredRelocation(MachObjectWriter *Writer,
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -335,7 +337,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
Writer->addRelocation(Fragment->getParent(), MRE);
}
-void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -381,6 +383,19 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
// Relocations are written out in reverse order, so the PAIR comes first.
if (Type == macho::RIT_Difference ||
Type == macho::RIT_Generic_LocalDifference) {
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") + Buffer +
+ ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
macho::RelocationEntry MRE;
MRE.Word0 = ((0 << 0) |
(macho::RIT_Pair << 24) |
@@ -389,6 +404,16 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
macho::RF_Scattered);
MRE.Word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
}
macho::RelocationEntry MRE;
@@ -399,6 +424,7 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
macho::RF_Scattered);
MRE.Word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
}
void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
@@ -469,9 +495,11 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
// relocations.
- if (Target.getSymB())
- return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ if (Target.getSymB()) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ return;
+ }
// Get the symbol data, if any.
MCSymbolData *SD = 0;
@@ -483,9 +511,13 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
uint32_t Offset = Target.getConstant();
if (IsPCRel)
Offset += 1 << Log2Size;
- if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
- return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ // Try to record the scattered relocation if needed. Fall back to non
+ // scattered if necessary (see comments in RecordScatteredRelocation()
+ // for details).
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) &&
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue))
+ return;
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 0d8def0e47..85922f1277 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -676,7 +676,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone).
- if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+ if (Is64Bit && !Fn->getFnAttributes().hasNoRedZoneAttr() &&
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index b409e88148..767e261a82 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -443,7 +443,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ OptForSize = MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
@@ -2253,6 +2253,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::ATOMSUB64_DAG:
case X86ISD::ATOMNAND64_DAG:
case X86ISD::ATOMAND64_DAG:
+ case X86ISD::ATOMMAX64_DAG:
+ case X86ISD::ATOMMIN64_DAG:
+ case X86ISD::ATOMUMAX64_DAG:
+ case X86ISD::ATOMUMIN64_DAG:
case X86ISD::ATOMSWAP64_DAG: {
unsigned Opc;
switch (Opcode) {
@@ -2263,6 +2267,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
+ case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
+ case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
+ case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
}
SDNode *RetVal = SelectAtomic64(Node, Opc);
@@ -2389,13 +2397,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SMUL_LOHI;
+ bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
- case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
- case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
+ MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
+ case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
+ MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
switch (NVT.getSimpleVT().SimpleTy) {
@@ -2407,13 +2418,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
}
- unsigned LoReg, HiReg;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unsupported VT!");
- case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
- case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
- case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
- case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ unsigned SrcReg, LoReg, HiReg;
+ switch (Opc) {
+ default: llvm_unreachable("Unknown MUL opcode!");
+ case X86::IMUL8r:
+ case X86::MUL8r:
+ SrcReg = LoReg = X86::AL; HiReg = X86::AH;
+ break;
+ case X86::IMUL16r:
+ case X86::MUL16r:
+ SrcReg = LoReg = X86::AX; HiReg = X86::DX;
+ break;
+ case X86::IMUL32r:
+ case X86::MUL32r:
+ SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
+ break;
+ case X86::IMUL64r:
+ case X86::MUL64r:
+ SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
+ break;
+ case X86::MULX32rr:
+ SrcReg = X86::EDX; LoReg = HiReg = 0;
+ break;
+ case X86::MULX64rr:
+ SrcReg = X86::RDX; LoReg = HiReg = 0;
+ break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2425,22 +2454,47 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
std::swap(N0, N1);
}
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
+ SDValue ResHi, ResLo;
if (foldedLoad) {
+ SDValue Chain;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
- array_lengthof(Ops));
- InFlag = SDValue(CNode, 1);
+ if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ Chain = SDValue(CNode, 2);
+ InFlag = SDValue(CNode, 3);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
+ }
// Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ ReplaceUses(N1.getValue(1), Chain);
} else {
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
- InFlag = SDValue(CNode, 0);
+ SDValue Ops[] = { N1, InFlag };
+ if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ InFlag = SDValue(CNode, 2);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 0);
+ }
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -2465,19 +2519,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResLo.getNode() == 0) {
+ assert(LoReg && "Register for low half is not defined!");
+ ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
+ InFlag);
+ InFlag = ResLo.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 0), ResLo);
+ DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResHi.getNode() == 0) {
+ assert(HiReg && "Register for high half is not defined!");
+ ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
+ InFlag);
+ InFlag = ResHi.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 1), ResHi);
+ DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;
@@ -2678,7 +2738,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MVT::i8, Reg);
// Emit a testb.
- return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $2048" to "testb %ah, $8".
@@ -2709,8 +2775,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
// target GR8_NOREX registers, so make sure the register class is
// forced.
- return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
- Subreg, ShiftedImm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
+ MVT::i32, Subreg, ShiftedImm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $32776" to "testw %ax, $32776".
@@ -2726,7 +2797,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MVT::i16, Reg);
// Emit a testw.
- return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
@@ -2742,7 +2819,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MVT::i32, Reg);
// Emit a testl.
- return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
}
break;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index bdfe245027..ffaf04cea7 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -522,6 +522,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
}
if (Subtarget->hasCmpxchg16b()) {
@@ -1357,7 +1361,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ !F->getFnAttributes().hasNoImplicitFloatAttr()) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
@@ -2048,7 +2052,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = Fn->getFnAttributes().hasNoImplicitFloatAttr();
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
@@ -2240,7 +2244,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, SR != NotStructReturn,
- MF.getFunction()->hasStructRetAttr(),
+ MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
@@ -2524,7 +2528,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
- cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ cast<Function>(GV)->getFnAttributes().hasNonLazyBindAttr()) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
@@ -2761,6 +2765,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
+ Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -2772,6 +2777,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// If -tailcallopt is specified, make fastcc functions tail-callable.
const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
+
+ // If the function return type is x86_fp80 and the callee return type is not,
+ // then the FP_EXTEND of the call result is not a nop. It's not safe to
+ // perform a tailcall optimization here.
+ if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
+ return false;
+
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
@@ -6661,7 +6673,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptForSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
@@ -9783,7 +9795,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
- .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ .getFunction()->getFnAttributes().hasNoImplicitFloatAttr()) &&
Subtarget->hasSSE1());
}
@@ -11769,6 +11781,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_SWAP: {
unsigned Opc;
switch (N->getOpcode()) {
@@ -11791,6 +11807,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_XOR:
Opc = X86ISD::ATOMXOR64_DAG;
break;
+ case ISD::ATOMIC_LOAD_MAX:
+ Opc = X86ISD::ATOMMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_MIN:
+ Opc = X86ISD::ATOMMIN64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMAX:
+ Opc = X86ISD::ATOMUMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMIN:
+ Opc = X86ISD::ATOMUMIN64_DAG;
+ break;
case ISD::ATOMIC_SWAP:
Opc = X86ISD::ATOMSWAP64_DAG;
break;
@@ -12182,6 +12210,10 @@ static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
+ case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
+ case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
+ case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
+ case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
}
llvm_unreachable("Unhandled atomic-load-op opcode!");
}
@@ -12499,6 +12531,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
SrcHiReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = &X86::GR32RegClass;
+ const TargetRegisterClass *RC8 = &X86::GR8RegClass;
unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
unsigned LOADOpc = X86::MOV32rm;
@@ -12586,6 +12619,55 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
break;
}
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ unsigned cL = MRI.createVirtualRegister(RC8);
+ unsigned cH = MRI.createVirtualRegister(RC8);
+ unsigned cL32 = MRI.createVirtualRegister(RC);
+ unsigned cH32 = MRI.createVirtualRegister(RC);
+ unsigned cc = MRI.createVirtualRegister(RC);
+ // cl := cmp src_lo, lo
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
+ // ch := cmp src_hi, hi
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
+ // cc := if (src_hi == hi) ? cl : ch;
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
+ .addReg(cH32).addReg(cL32);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
+ .addReg(cH32).addReg(cL32)
+ .addImm(X86::COND_E);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L)
+ .addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H)
+ .addReg(SrcHiReg).addReg(HiReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L)
+ .addReg(SrcLoReg).addReg(LoReg)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H)
+ .addReg(SrcHiReg).addReg(HiReg)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ break;
+ }
case X86::ATOMSWAP6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
@@ -13576,6 +13658,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMNAND6432:
case X86::ATOMADD6432:
case X86::ATOMSUB6432:
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432:
case X86::ATOMSWAP6432:
return EmitAtomicLoadArith6432(MI, BB);
@@ -15562,7 +15648,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = F->getFnAttributes().hasNoImplicitFloatAttr();
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d3545b0e9f..a53909b7a0 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -355,6 +355,10 @@ namespace llvm {
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,