diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 27 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.c | 4 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 60 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 13 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86CodeEmitter.cpp | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86FloatingPoint.cpp | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 354 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 1255 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 43 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 399 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFMA.td | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 22 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 57 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 490 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrXOP.td | 17 |
19 files changed, 1416 insertions, 1370 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c89e738a8b..77961e53ae 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -60,6 +60,10 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); + bool mnemonicIsValid(StringRef Mnemonic) { + return mnemonicIsValidImpl(Mnemonic); + } + bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); @@ -202,7 +206,8 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + /// getLocRange - Get the range between the first and last token of this + /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } virtual void print(raw_ostream &OS) const {} @@ -633,14 +638,15 @@ X86Operand *X86AsmParser::ParseOperand() { /// getIntelMemOperandSize - Return intel memory operand size. static unsigned getIntelMemOperandSize(StringRef OpStr) { - unsigned Size = 0; - if (OpStr == "BYTE") Size = 8; - if (OpStr == "WORD") Size = 16; - if (OpStr == "DWORD") Size = 32; - if (OpStr == "QWORD") Size = 64; - if (OpStr == "XWORD") Size = 80; - if (OpStr == "XMMWORD") Size = 128; - if (OpStr == "YMMWORD") Size = 256; + unsigned Size = StringSwitch<unsigned>(OpStr) + .Cases("BYTE", "byte", 8) + .Cases("WORD", "word", 16) + .Cases("DWORD", "dword", 32) + .Cases("QWORD", "qword", 64) + .Cases("XWORD", "xword", 80) + .Cases("XMMWORD", "xmmword", 128) + .Cases("YMMWORD", "ymmword", 256) + .Default(0); return Size; } @@ -743,7 +749,8 @@ X86Operand *X86AsmParser::ParseIntelMemOperand() { unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { Parser.Lex(); - assert (Tok.getString() == "PTR" && "Unexpected token!"); + assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") && + "Unexpected token!"); Parser.Lex(); } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index af444d196e..b24f517209 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -138,6 +138,10 @@ static InstrUID decode(OpcodeType type, if (modFromModRM(modRM) == 0x3) return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; + case MODRM_SPLITMISC: + if (modFromModRM(modRM) == 0x3) + return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; case MODRM_FULL: return modRMTable[dec->instructionIDs+modRM]; } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index b0a0e1e78e..23dfe4b5b5 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -160,6 +160,10 @@ typedef uint16_t InstrUID; * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode * corresponds to one instruction; otherwise, it corresponds to * a different instruction. + * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte + * divided by 8 is used to select instruction; otherwise, each + * value of the ModR/M byte could correspond to a different + * instruction. * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This corresponds to instructions that use reg field as opcode * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond @@ -169,6 +173,7 @@ typedef uint16_t InstrUID; #define MODRMTYPES \ ENUM_ENTRY(MODRM_ONEENTRY) \ ENUM_ENTRY(MODRM_SPLITRM) \ + ENUM_ENTRY(MODRM_SPLITMISC) \ ENUM_ENTRY(MODRM_SPLITREG) \ ENUM_ENTRY(MODRM_FULL) diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index fb1ba12a52..2696190d23 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -67,9 +67,10 @@ public: }; class X86AsmBackend : public MCAsmBackend { + StringRef CPU; public: - X86AsmBackend(const Target &T) - : MCAsmBackend() {} + X86AsmBackend(const Target &T, StringRef _CPU) + : MCAsmBackend(), CPU(_CPU) {} unsigned getNumFixupKinds() const { return X86::NumTargetFixupKinds; @@ -279,9 +280,9 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.setOpcode(RelaxedOp); } -/// writeNopData - Write optimal nops to the output file for the \arg Count +/// writeNopData - Write optimal nops to the output file for the \p Count /// bytes. This returns the number of bytes written. It may return 0 if -/// the \arg Count is more than the maximum optimal nops. +/// the \p Count is more than the maximum optimal nops. bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { static const uint8_t Nops[10][10] = { // nop @@ -306,6 +307,13 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, }; + // This CPU doesnt support long nops. If needed add more. + if (CPU == "geode") { + for (uint64_t i = 0; i < Count; ++i) + OW->Write8(0x90); + return true; + } + // Write an optimal sequence for the first 15 bytes. const uint64_t OptimalCount = (Count < 16) ? Count : 15; const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10; @@ -329,9 +337,9 @@ class ELFX86AsmBackend : public X86AsmBackend { public: uint8_t OSABI; Triple::OSType OSType; // @LOCALMOD: kept OSTYPE vs upstream. FIXME: remove. - ELFX86AsmBackend(const Target &T, uint8_t _OSABI, + ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU, Triple::OSType _OSType) - : X86AsmBackend(T), OSABI(_OSABI), OSType(_OSType) { + : X86AsmBackend(T, CPU), OSABI(_OSABI), OSType(_OSType) { HasReliableSymbolDifference = true; } @@ -358,9 +366,9 @@ public: class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, + ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU, Triple::OSType OSType) // @LOCALMOD: kept OSType - : ELFX86AsmBackend(T, OSABI, OSType) {} + : ELFX86AsmBackend(T, OSABI, CPU, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86ELFObjectWriter(OS, /*Is64Bit*/ false, OSABI); @@ -369,9 +377,9 @@ public: class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: - ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, + ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU, Triple::OSType OSType) // @LOCALMOD: kept OSType - : ELFX86AsmBackend(T, OSABI, OSType) {} + : ELFX86AsmBackend(T, OSABI, CPU, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86ELFObjectWriter(OS, /*Is64Bit*/ true, OSABI); @@ -382,8 +390,8 @@ class WindowsX86AsmBackend : public X86AsmBackend { bool Is64Bit; public: - WindowsX86AsmBackend(const Target &T, bool is64Bit) - : X86AsmBackend(T) + WindowsX86AsmBackend(const Target &T, bool is64Bit, StringRef CPU) + : X86AsmBackend(T, CPU) , Is64Bit(is64Bit) { } @@ -394,14 +402,14 @@ public: class DarwinX86AsmBackend : public X86AsmBackend { public: - DarwinX86AsmBackend(const Target &T) - : X86AsmBackend(T) { } + DarwinX86AsmBackend(const Target &T, StringRef CPU) + : X86AsmBackend(T, CPU) { } }; class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { public: - DarwinX86_32AsmBackend(const Target &T) - : DarwinX86AsmBackend(T) {} + DarwinX86_32AsmBackend(const Target &T, StringRef CPU) + : DarwinX86AsmBackend(T, CPU) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, @@ -412,8 +420,8 @@ public: class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { public: - DarwinX86_64AsmBackend(const Target &T) - : DarwinX86AsmBackend(T) { + DarwinX86_64AsmBackend(const Target &T, StringRef CPU) + : DarwinX86AsmBackend(T, CPU) { HasReliableSymbolDifference = true; } @@ -459,28 +467,28 @@ public: } // end anonymous namespace -MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) - return new DarwinX86_32AsmBackend(T); + return new DarwinX86_32AsmBackend(T, CPU); if (TheTriple.isOSWindows()) - return new WindowsX86AsmBackend(T, false); + return new WindowsX86AsmBackend(T, false, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); - return new ELFX86_32AsmBackend(T, OSABI, TheTriple.getOS()); + return new ELFX86_32AsmBackend(T, OSABI, CPU, TheTriple.getOS()); } -MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) - return new DarwinX86_64AsmBackend(T); + return new DarwinX86_64AsmBackend(T, CPU); if (TheTriple.isOSWindows()) - return new WindowsX86AsmBackend(T, true); + return new WindowsX86AsmBackend(T, true, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); - return new ELFX86_64AsmBackend(T, OSABI, TheTriple.getOS()); + return new ELFX86_64AsmBackend(T, OSABI, CPU, TheTriple.getOS()); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 65b2444da7..06138bf335 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -28,8 +28,8 @@ using namespace llvm; namespace { class X86MCCodeEmitter : public MCCodeEmitter { - X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + X86MCCodeEmitter(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCSubtargetInfo &STI; MCContext &Ctx; @@ -560,15 +560,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, } - // Set the vector length to 256-bit if YMM0-YMM15 is used - for (unsigned i = 0; i != MI.getNumOperands(); ++i) { - if (!MI.getOperand(i).isReg()) - continue; - unsigned SrcReg = MI.getOperand(i).getReg(); - if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) - VEX_L = 1; - } - // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 46500699eb..4b0cacecfa 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -80,8 +80,8 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT); -MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT); +MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU); +MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU); /// createX86MachObjectWriter - Construct an X86 Mach-O object writer. MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index d078a7b5df..def0f16e96 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -231,6 +231,7 @@ def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureFMA]>; +def : Proc<"geode", [Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 8448556720..f89e399658 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -922,17 +922,6 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, } - // Set the vector length to 256-bit if YMM0-YMM15 is used - for (unsigned i = 0; i != MI.getNumOperands(); ++i) { - if (!MI.getOperand(i).isReg()) - continue; - if (MI.getOperand(i).isImplicit()) - continue; - unsigned SrcReg = MI.getOperand(i).getReg(); - if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) - VEX_L = 1; - } - // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 9d5de814be..791f5982af 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -171,7 +171,7 @@ namespace { // Shuffle live registers to match the expectations of successor blocks. void finishBlockStack(); -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dumpStack() const { dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { @@ -577,8 +577,8 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V, - const TableEntry &TE) { + friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V, + const TableEntry &TE) { return V < TE.from; } }; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6c0369f70a..b409e88148 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -101,7 +101,7 @@ namespace { Base_Reg = Reg; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; @@ -192,7 +192,6 @@ namespace { SDNode *Select(SDNode *N); SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); - SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); @@ -252,13 +251,15 @@ namespace { else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp, AM.SymbolFlags); - else if (AM.ES) + else if (AM.ES) { + assert(!AM.Disp && "Non-zero displacement is ignored with ES."); Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); - else if (AM.JT != -1) + } else if (AM.JT != -1) { + assert(!AM.Disp && "Non-zero displacement is ignored with JT."); Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); - else if (AM.BlockAddr) - Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, - true, AM.SymbolFlags); + } else if (AM.BlockAddr) + Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, + AM.SymbolFlags); else Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); @@ -678,10 +679,16 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); - } else { - AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); - AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); - } + } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { + X86ISelAddressMode Backup = AM; + AM.BlockAddr = BA->getBlockAddress(); + AM.SymbolFlags = BA->getTargetFlags(); + if (FoldOffsetIntoAddress(BA->getOffset(), AM)) { + AM = Backup; + return true; + } + } else + llvm_unreachable("Unhandled symbol reference node."); if (N.getOpcode() == X86ISD::WrapperRIP) AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); @@ -710,10 +717,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); - } else { - AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); - AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); - } + } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { + AM.BlockAddr = BA->getBlockAddress(); + AM.Disp += BA->getOffset(); + AM.SymbolFlags = BA->getTargetFlags(); + } else + llvm_unreachable("Unhandled symbol reference node."); return false; } @@ -1683,6 +1692,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In1 = Node->getOperand(1); SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; @@ -1696,159 +1706,13 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { return ResNode; } -// FIXME: Figure out some way to unify this with the 'or' and other code -// below. -SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { - if (Node->hasAnyUseOfValue(0)) - return 0; - - // Optimize common patterns for __sync_add_and_fetch and - // __sync_sub_and_fetch where the result is not used. This allows us - // to use "lock" version of add, sub, inc, dec instructions. - // FIXME: Do not use special instructions but instead add the "lock" - // prefix to the target node somehow. The extra information will then be - // transferred to machine instruction and it denotes the prefix. - SDValue Chain = Node->getOperand(0); - SDValue Ptr = Node->getOperand(1); - SDValue Val = Node->getOperand(2); - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return 0; - - bool isInc = false, isDec = false, isSub = false, isCN = false; - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); - if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) { - isCN = true; - int64_t CNVal = CN->getSExtValue(); - if (CNVal == 1) - isInc = true; - else if (CNVal == -1) - isDec = true; - else if (CNVal >= 0) - Val = CurDAG->getTargetConstant(CNVal, NVT); - else { - isSub = true; - Val = CurDAG->getTargetConstant(-CNVal, NVT); - } - } else if (Val.hasOneUse() && - Val.getOpcode() == ISD::SUB && - X86::isZeroNode(Val.getOperand(0))) { - isSub = true; - Val = Val.getOperand(1); - } - - DebugLoc dl = Node->getDebugLoc(); - unsigned Opc = 0; - switch (NVT.getSimpleVT().SimpleTy) { - default: return 0; - case MVT::i8: - if (isInc) - Opc = X86::LOCK_INC8m; - else if (isDec) - Opc = X86::LOCK_DEC8m; - else if (isSub) { - if (isCN) - Opc = X86::LOCK_SUB8mi; - else - Opc = X86::LOCK_SUB8mr; - } else { - if (isCN) - Opc = X86::LOCK_ADD8mi; - else - Opc = X86::LOCK_ADD8mr; - } - break; - case MVT::i16: - if (isInc) - Opc = X86::LOCK_INC16m; - else if (isDec) - Opc = X86::LOCK_DEC16m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB16mi8; - else - Opc = X86::LOCK_SUB16mi; - } else - Opc = X86::LOCK_SUB16mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD16mi8; - else - Opc = X86::LOCK_ADD16mi; - } else - Opc = X86::LOCK_ADD16mr; - } - break; - case MVT::i32: - if (isInc) - Opc = X86::LOCK_INC32m; - else if (isDec) - Opc = X86::LOCK_DEC32m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB32mi8; - else - Opc = X86::LOCK_SUB32mi; - } else - Opc = X86::LOCK_SUB32mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD32mi8; - else - Opc = X86::LOCK_ADD32mi; - } else - Opc = X86::LOCK_ADD32mr; - } - break; - case MVT::i64: - if (isInc) - Opc = X86::LOCK_INC64m; - else if (isDec) - Opc = X86::LOCK_DEC64m; - else if (isSub) { - Opc = X86::LOCK_SUB64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_SUB64mi32; - } - } else { - Opc = X86::LOCK_ADD64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_ADD64mi32; - } - } - break; - } - - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, NVT), 0); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); - if (isInc || isDec) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } else { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } -} - +/// Atomic opcode table +/// enum AtomicOpc { + ADD, + SUB, + INC, + DEC, OR, AND, XOR, @@ -1872,6 +1736,58 @@ enum AtomicSz { static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { { + X86::LOCK_ADD8mi, + X86::LOCK_ADD8mr, + X86::LOCK_ADD16mi8, + X86::LOCK_ADD16mi, + X86::LOCK_ADD16mr, + X86::LOCK_ADD32mi8, + X86::LOCK_ADD32mi, + X86::LOCK_ADD32mr, + X86::LOCK_ADD64mi8, + X86::LOCK_ADD64mi32, + X86::LOCK_ADD64mr, + }, + { + X86::LOCK_SUB8mi, + X86::LOCK_SUB8mr, + X86::LOCK_SUB16mi8, + X86::LOCK_SUB16mi, + X86::LOCK_SUB16mr, + X86::LOCK_SUB32mi8, + X86::LOCK_SUB32mi, + X86::LOCK_SUB32mr, + X86::LOCK_SUB64mi8, + X86::LOCK_SUB64mi32, + X86::LOCK_SUB64mr, + }, + { + 0, + X86::LOCK_INC8m, + 0, + 0, + X86::LOCK_INC16m, + 0, + 0, + X86::LOCK_INC32m, + 0, + 0, + X86::LOCK_INC64m, + }, + { + 0, + X86::LOCK_DEC8m, + 0, + 0, + X86::LOCK_DEC16m, + 0, + 0, + X86::LOCK_DEC32m, + 0, + 0, + X86::LOCK_DEC64m, + }, + { X86::LOCK_OR8mi, X86::LOCK_OR8mr, X86::LOCK_OR16mi8, @@ -1882,7 +1798,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_OR32mr, X86::LOCK_OR64mi8, X86::LOCK_OR64mi32, - X86::LOCK_OR64mr + X86::LOCK_OR64mr, }, { X86::LOCK_AND8mi, @@ -1895,7 +1811,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_AND32mr, X86::LOCK_AND64mi8, X86::LOCK_AND64mi32, - X86::LOCK_AND64mr + X86::LOCK_AND64mr, }, { X86::LOCK_XOR8mi, @@ -1908,18 +1824,74 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_XOR32mr, X86::LOCK_XOR64mi8, X86::LOCK_XOR64mi32, - X86::LOCK_XOR64mr + X86::LOCK_XOR64mr, } }; +// Return the target constant operand for atomic-load-op and do simple +// translations, such as from atomic-load-add to lock-sub. The return value is +// one of the following 3 cases: +// + target-constant, the operand could be supported as a target constant. +// + empty, the operand is not needed any more with the new op selected. +// + non-empty, otherwise. +static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, + DebugLoc dl, + enum AtomicOpc &Op, EVT NVT, + SDValue Val) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) { + int64_t CNVal = CN->getSExtValue(); + // Quit if not 32-bit imm. + if ((int32_t)CNVal != CNVal) + return Val; + // For atomic-load-add, we could do some optimizations. + if (Op == ADD) { + // Translate to INC/DEC if ADD by 1 or -1. + if ((CNVal == 1) || (CNVal == -1)) { + Op = (CNVal == 1) ? INC : DEC; + // No more constant operand after being translated into INC/DEC. + return SDValue(); + } + // Translate to SUB if ADD by negative value. + if (CNVal < 0) { + Op = SUB; + CNVal = -CNVal; + } + } + return CurDAG->getTargetConstant(CNVal, NVT); + } + + // If the value operand is single-used, try to optimize it. + if (Op == ADD && Val.hasOneUse()) { + // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). + if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { + Op = SUB; + return Val.getOperand(1); + } + // A special case for i16, which needs truncating as, in most cases, it's + // promoted to i32. We will translate + // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) + if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && + Val.getOperand(0).getOpcode() == ISD::SUB && + X86::isZeroNode(Val.getOperand(0).getOperand(0))) { + Op = SUB; + Val = Val.getOperand(0); + return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, + Val.getOperand(1)); + } + } + + return Val; +} + SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; + DebugLoc dl = Node->getDebugLoc(); + // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. - // FIXME: Same as for 'add' and 'sub', try to merge those down here. SDValue Chain = Node->getOperand(0); SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); @@ -1930,6 +1902,8 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { // Which index into the table. enum AtomicOpc Op; switch (Node->getOpcode()) { + default: + return 0; case ISD::ATOMIC_LOAD_OR: Op = OR; break; @@ -1939,16 +1913,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { case ISD::ATOMIC_LOAD_XOR: Op = XOR; break; - default: - return 0; - } - - bool isCN = false; - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); - if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { - isCN = true; - Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); + case ISD::ATOMIC_LOAD_ADD: + Op = ADD; + break; } + + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); + bool isUnOp = !Val.getNode(); + bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { @@ -1990,13 +1962,20 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { assert(Opc != 0 && "Invalid arith lock transform!"); - DebugLoc dl = Node->getDebugLoc(); + SDValue Ret; SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); + if (isUnOp) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, + array_lengthof(Ops)), 0); + } else { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, + array_lengthof(Ops)), 0); + } cast<Machine |