diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-08-17 14:35:45 -0700 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-08-17 14:35:45 -0700 |
commit | b62e9abf7dd9e39c95327914ce9dfe216386824a (patch) | |
tree | c683f0bcbef19f622727251165eaf89a4f806c62 /lib/Target/X86 | |
parent | 66f65db9406ca9e59d4bfed89436f668d6a84374 (diff) | |
parent | c723eb1aef817d47feec620933ee1ec6005cdd14 (diff) |
Merge commit 'c723eb1aef817d47feec620933ee1ec6005cdd14'
This merges r159618 from upstream into master. It goes with clang rev
af50aab0c317462129d73ae8000c6394c718598d
Conflicts:
include/llvm/CodeGen/LexicalScopes.h
include/llvm/Target/TargetOptions.h
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
lib/Target/ARM/ARMBaseInstrInfo.cpp
lib/Target/ARM/ARMTargetMachine.cpp
lib/Target/ARM/ARMTargetObjectFile.cpp
lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
lib/Target/Mips/MipsISelDAGToDAG.cpp
lib/Target/Mips/MipsInstrFPU.td
lib/Target/Mips/MipsMCInstLower.cpp
lib/Target/Mips/MipsTargetMachine.cpp
lib/Target/TargetMachine.cpp
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86RegisterInfo.cpp
lib/Target/X86/X86TargetObjectFile.cpp
lib/Target/X86/X86TargetObjectFile.h
tools/llc/llc.cpp
(tools/llc/llc.cpp is from a merged version of r160532 because it was a bit
hairy and I didn't want to redo it.)
Diffstat (limited to 'lib/Target/X86')
22 files changed, 655 insertions, 519 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 08c732c388..417842b467 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -187,7 +187,7 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } virtual void print(raw_ostream &OS) const {} @@ -309,25 +309,25 @@ struct X86Operand : public MCParsedAsmOperand { } bool isMem() const { return Kind == Memory; } - bool isMem8() const { + bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); } - bool isMem16() const { + bool isMem16() const { return Kind == Memory && (!Mem.Size || Mem.Size == 16); } - bool isMem32() const { + bool isMem32() const { return Kind == Memory && (!Mem.Size || Mem.Size == 32); } - bool isMem64() const { + bool isMem64() const { return Kind == Memory && (!Mem.Size || Mem.Size == 64); } - bool isMem80() const { + bool isMem80() const { return Kind == Memory && (!Mem.Size || Mem.Size == 80); } - bool isMem128() const { + bool isMem128() const { return Kind == Memory && (!Mem.Size || Mem.Size == 128); } - bool isMem256() const { + bool isMem256() const { return Kind == Memory && (!Mem.Size || Mem.Size == 256); } @@ -356,26 +356,26 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addMem8Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem8Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem16Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem16Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem80Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem80Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem128Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem128Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem256Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem256Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -467,7 +467,7 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) { bool X86AsmParser::isDstOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; - return Op.isMem() && + return Op.isMem() && (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && isa<MCConstantExpr>(Op.Mem.Disp) && cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && @@ -611,7 +611,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); Parser.Lex(); - + if (getLexer().is(AsmToken::Identifier)) { // Parse BaseReg if (ParseRegister(BaseReg, Start, End)) { @@ -668,7 +668,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, End = Parser.getTok().getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) return 0; + else if (getParser().ParseExpression(Disp, End)) return 0; } } @@ -916,15 +916,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we have both a base register and an index register make sure they are // both 64-bit or 32-bit registers. + // To support VSIB, IndexReg can be 128-bit or 256-bit registers. if (BaseReg != 0 && IndexReg != 0) { if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && IndexReg != X86::RIZ) { Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); return 0; } if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && IndexReg != X86::EIZ){ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); return 0; @@ -944,7 +947,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, if (PatchedName.startswith("set") && PatchedName.endswith("b") && PatchedName != "setb" && PatchedName != "setnb") PatchedName = PatchedName.substr(0, Name.size()-1); - + // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && @@ -1217,7 +1220,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } } - + // Transforms "int $3" into "int3" as a size optimization. We can't write an // instalias with an immediate operand yet. if (Name == "int" && Operands.size() == 2) { @@ -1520,7 +1523,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, case Match_Success: // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the - // individual transformations can chain off each other. + // individual transformations can chain off each other. while (processInstruction(Inst, Operands)) ; @@ -1558,12 +1561,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, // Otherwise, we assume that this may be an integer instruction, which comes // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; - + // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; unsigned Match1, Match2, Match3, Match4; - + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); Tmp[Base.size()] = Suffixes[1]; Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); @@ -1691,19 +1694,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { const MCExpr *Value; if (getParser().ParseExpression(Value)) return true; - + getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); - + if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); Parser.Lex(); } } - + Parser.Lex(); return false; } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 5b402da3ad..45fd42f205 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -53,6 +53,8 @@ endif() add_llvm_target(X86CodeGen ${sources}) +add_dependencies(LLVMX86CodeGen intrinsics_gen) + add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index b13a00620b..d58e36c803 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -498,7 +498,38 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else { baseReg = MCOperand::CreateReg(0); } - + + // Check whether we are handling VSIB addressing mode for GATHER. + // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and + // we should use SIB_INDEX_XMM4|YMM4 for VSIB. + // I don't see a way to get the correct IndexReg in readSIB: + // We can tell whether it is VSIB or SIB after instruction ID is decoded, + // but instruction ID may not be decoded yet when calling readSIB. + uint32_t Opcode = mcInst.getOpcode(); + bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || + Opcode == X86::VGATHERDPDYrm || + Opcode == X86::VGATHERQPDrm || + Opcode == X86::VGATHERDPSrm || + Opcode == X86::VGATHERQPSrm || + Opcode == X86::VPGATHERDQrm || + Opcode == X86::VPGATHERDQYrm || + Opcode == X86::VPGATHERQQrm || + Opcode == X86::VPGATHERDDrm || + Opcode == X86::VPGATHERQDrm); + bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || + Opcode == X86::VGATHERDPSYrm || + Opcode == X86::VGATHERQPSYrm || + Opcode == X86::VPGATHERQQYrm || + Opcode == X86::VPGATHERDDYrm || + Opcode == X86::VPGATHERQDYrm); + if (IndexIs128 || IndexIs256) { + unsigned IndexOffset = insn.sibIndex - + (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); + SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; + insn.sibIndex = (SIBIndex)(IndexBase + + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); + } + if (insn.sibIndex != SIB_INDEX_NONE) { switch (insn.sibIndex) { default: @@ -509,6 +540,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, indexReg = MCOperand::CreateReg(X86::x); break; EA_BASES_32BIT EA_BASES_64BIT + REGS_XMM + REGS_YMM #undef ENTRY } } else { diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index fae309b45d..e2caf6a2a8 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -310,11 +310,14 @@ typedef enum { * SIBIndex - All possible values of the SIB index field. * Borrows entries from ALL_EA_BASES with the special case that * sib is synonymous with NONE. + * Vector SIB: index can be XMM or YMM. */ typedef enum { SIB_INDEX_NONE, #define ENTRY(x) SIB_INDEX_##x, ALL_EA_BASES + REGS_XMM + REGS_YMM #undef ENTRY SIB_INDEX_max } SIBIndex; diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index c3f46ebda0..b0e5be3162 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -483,17 +483,17 @@ namespace X86II { // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // - static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { + inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - static inline bool hasImm(uint64_t TSFlags) { + inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. - static inline unsigned getSizeOfImm(uint64_t TSFlags) { + inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8: @@ -508,7 +508,7 @@ namespace X86II { /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(uint64_t TSFlags) { + inline unsigned isImmPCRel(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8PCRel: @@ -531,7 +531,7 @@ namespace X86II { /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only /// counted as one operand. /// - static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { + inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: // FIXME: Remove this form. @@ -594,7 +594,7 @@ namespace X86II { /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or /// higher) register? e.g. r8, xmm8, xmm13, etc. - static inline bool isX86_64ExtendedReg(unsigned RegNo) { + inline bool isX86_64ExtendedReg(unsigned RegNo) { switch (RegNo) { default: break; case X86::R8: case X86::R9: case X86::R10: case X86::R11: @@ -616,7 +616,7 @@ namespace X86II { return false; } - static inline bool isX86_64NonExtLowByteReg(unsigned reg) { + inline bool isX86_64NonExtLowByteReg(unsigned reg) { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 61e2fdcb62..7f7873acd1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -621,7 +621,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); + // Instruction format for 4VOp3: + // src1(ModR/M), MemAddr, src3(VEX_4V) + // CurOp points to start of the MemoryOperand, + // it skips TIED_TO operands if exist, then increments past src1. + // CurOp + X86::AddrNumOperands will point to src3. + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); break; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index ee66e7ce1c..599c8f8c6d 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -20,10 +20,10 @@ #include "X86TargetMachine.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "llvm/CallingConv.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Type.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -436,7 +436,9 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(OpNo); switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'a': // This is an address. Currently only 'i' and 'r' are expected. if (MO.isImm()) { O << MO.getImm(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index af9efbd906..e263e44f40 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -52,7 +52,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || - MMI.callsUnwindInit()); + MMI.callsUnwindInit() || MMI.callsEHReturn()); } static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { @@ -652,7 +652,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); - unsigned BasePtr = RegInfo->getBaseRegister(); DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame @@ -916,18 +915,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, UseLEA, TII, *RegInfo); - // If we need a base pointer, set it up here. It's whatever the value - // of the stack pointer is at this point. Any variable size objects - // will be allocated after this, so we can still use the base pointer - // to reference locals. - if (RegInfo->hasBasePointer(MF)) { - // Update the frame pointer with the current stack pointer. - unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; - BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); - } - if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); @@ -1184,16 +1171,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); uint64_t StackSize = MFI->getStackSize(); - if (RegInfo->hasBasePointer(MF)) { - assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); - if (FI < 0) { - // Skip the saved EBP. - return Offset + RegInfo->getSlotSize(); - } else { - assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); - return Offset + StackSize; - } - } else if (RegInfo->needsStackRealignment(MF)) { + if (RegInfo->needsStackRealignment(MF)) { if (FI < 0) { // Skip the saved EBP. return Offset + RegInfo->getSlotSize(); @@ -1224,14 +1202,9 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); // We can't calculate offset from frame pointer if the stack is realigned, - // so enforce usage of stack/base pointer. The base pointer is used when we - // have dynamic allocas in addition to dynamic realignment. - if (RegInfo->hasBasePointer(MF)) - FrameReg = RegInfo->getBaseRegister(); - else if (RegInfo->needsStackRealignment(MF)) - FrameReg = RegInfo->getStackRegister(); - else - FrameReg = RegInfo->getFrameRegister(MF); + // so enforce usage of stack pointer. + FrameReg = (RegInfo->needsStackRealignment(MF)) ? + RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF); return getFrameIndexOffset(MF, FI); } @@ -1368,10 +1341,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, "Slot for EBP register must be last in order to be found!"); (void)FrameIdx; } - - // Spill the BasePtr if it's used. - if (RegInfo->hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); } static bool diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c8ff1cf0d0..2871a790c6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -188,6 +188,7 @@ namespace { private: SDNode *Select(SDNode *N); + SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); @@ -2165,6 +2166,30 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { llvm_unreachable("unrecognized size for LdVT"); } +/// SelectGather - Customized ISel for GATHER operations. +/// +SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { + // Operands of Gather: VSrc, Base, VIdx, VMask, Scale + SDValue Chain = Node->getOperand(0); + SDValue VSrc = Node->getOperand(2); + SDValue Base = Node->getOperand(3); + SDValue VIdx = Node->getOperand(4); + SDValue VMask = Node->getOperand(5); + ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); + if (!Scale) + return 0; + + // Memory Operands: Base, Scale, Index, Disp, Segment + SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); + SDValue Segment = CurDAG->getRegister(0, MVT::i32); + const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, + Disp, Segment, VMask, Chain}; + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + VSrc.getValueType(), MVT::Other, + Ops, array_lengthof(Ops)); + return ResNode; +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; @@ -2180,23 +2205,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { default: break; + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: break; + case Intrinsic::x86_avx2_gather_d_pd: + case Intrinsic::x86_avx2_gather_d_pd_256: + case Intrinsic::x86_avx2_gather_q_pd: + case Intrinsic::x86_avx2_gather_q_pd_256: + case Intrinsic::x86_avx2_gather_d_ps: + case Intrinsic::x86_avx2_gather_d_ps_256: + case Intrinsic::x86_avx2_gather_q_ps: + case Intrinsic::x86_avx2_gather_q_ps_256: + case Intrinsic::x86_avx2_gather_d_q: + case Intrinsic::x86_avx2_gather_d_q_256: + case Intrinsic::x86_avx2_gather_q_q: + case Intrinsic::x86_avx2_gather_q_q_256: + case Intrinsic::x86_avx2_gather_d_d: + case Intrinsic::x86_avx2_gather_d_d_256: + case Intrinsic::x86_avx2_gather_q_d: + case Intrinsic::x86_avx2_gather_q_d_256: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; + case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; + case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; + case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; + case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; + case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; + case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; + case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; + case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; + case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; + case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; + case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; + case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; + case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; + case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; + case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; + } + SDNode *RetVal = SelectGather(Node, Opc); + if (RetVal) + return RetVal; + break; + } + } + break; + } case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); + case X86ISD::ATOMOR64_DAG: - return SelectAtomic64(Node, X86::ATOMOR6432); case X86ISD::ATOMXOR64_DAG: - return SelectAtomic64(Node, X86::ATOMXOR6432); case X86ISD::ATOMADD64_DAG: - return SelectAtomic64(Node, X86::ATOMADD6432); case X86ISD::ATOMSUB64_DAG: - return SelectAtomic64(Node, X86::ATOMSUB6432); case X86ISD::ATOMNAND64_DAG: - return SelectAtomic64(Node, X86::ATOMNAND6432); case X86ISD::ATOMAND64_DAG: - return SelectAtomic64(Node, X86::ATOMAND6432); - case X86ISD::ATOMSWAP64_DAG: - return SelectAtomic64(Node, X86::ATOMSWAP6432); + case X86ISD::ATOMSWAP64_DAG: { + unsigned Opc; + switch (Opcode) { + default: llvm_unreachable("Impossible intrinsic"); + case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; + case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; + case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; + case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; + case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; + case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; + } + SDNode *RetVal = SelectAtomic64(Node, Opc); + if (RetVal) + return RetVal; + break; + } case ISD::ATOMIC_LOAD_ADD: { SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bf559c98dd..4197c35adb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -99,6 +99,10 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, DebugLoc dl) { + // Inserting UNDEF is Result + if (Vec.getOpcode() == ISD::UNDEF) + return Result; + EVT VT = Vec.getValueType(); assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); @@ -114,9 +118,8 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, * ElemsPerChunk); SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, - VecIdx); - return Result; + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, + VecIdx); } /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 @@ -136,10 +139,13 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { if (Subtarget->isTargetEnvMacho()) { if (is64Bit) - return new X8664_MachoTargetObjectFile(); + return new X86_64MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); } + if (Subtarget->isTargetLinux()) + return new X86LinuxTargetObjectFile(); + // @LOCALMOD-BEGIN if (Subtarget->isTargetNaCl()) return new TargetLoweringObjectFileNaCl(); @@ -3536,6 +3542,52 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) { return true; } +// +// Some special combinations that can be optimized. +// +static +SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + EVT VT = SVOp->getValueType(0); + DebugLoc dl = SVOp->getDebugLoc(); + + if (VT != MVT::v8i32 && VT != MVT::v8f32) + return SDValue(); + + ArrayRef<int> Mask = SVOp->getMask(); + + // These are the special masks that may be optimized. + static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14}; + static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15}; + bool MatchEvenMask = true; + bool MatchOddMask = true; + for (int i=0; i<8; ++i) { + if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i])) + MatchEvenMask = false; + if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) + MatchOddMask = false; + } + static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; + static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; + + const int *CompactionMask; + if (MatchEvenMask) + CompactionMask = CompactionMaskEven; + else if (MatchOddMask) + CompactionMask = CompactionMaskOdd; + else + return SDValue(); + + SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); + + SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), + UndefNode, CompactionMask); + SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), + UndefNode, CompactionMask); + static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT, @@ -5041,8 +5093,16 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { SDValue Sc = Op.getOperand(0); if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR && - Sc.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); + Sc.getOpcode() != ISD::BUILD_VECTOR) { + + if (!Subtarget->hasAVX2()) + return SDValue(); + + // Use the register form of the broadcast instruction available on AVX2. + if (VT.is256BitVector()) + Sc = Extract128BitVector(Sc, 0, DAG, dl); + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc); + } Ld = Sc.getOperand(0); ConstSplatVal = (Ld.getOpcode() == ISD::Constant || @@ -6022,6 +6082,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, /// which could not be matched by any known target speficic shuffle static SDValue LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + + SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG); + if (NewOp.getNode()) + return NewOp; + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); @@ -7504,11 +7569,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = GA->getGlobal(); if (Subtarget->isTargetELF()) { - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GV = GA->resolveAliasedGlobal(false); - TLSModel::Model model = getTargetMachine().getTLSModel(GV); // @LOCALMOD-START @@ -9995,7 +10055,6 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); @@ -10015,7 +10074,6 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); - MF.getRegInfo().addLiveOut(StoreAddrReg); return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, @@ -16240,12 +16298,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // wrong class. This can happen with constraints like {xmm0} where the // target independent register mapper will just pick the first match it can // find, ignoring the required type. - if (VT == MVT::f32) + + if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32RegClass; - else if (VT == MVT::f64) + else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64RegClass; else if (X86::VR128RegClass.hasType(VT)) Res.second = &X86::VR128RegClass; + else if (X86::VR256RegClass.hasType(VT)) + Res.second = &X86::VR256RegClass; } return Res; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index fa1d67644d..aaef4a466d 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -55,11 +55,11 @@ struct X86AddressMode { : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } - - + + void getFullAddress(SmallVectorImpl<MachineOperand> &MO) { assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); - + if (BaseType == X86AddressMode::RegBase) MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, false, false, false, 0, false)); @@ -67,16 +67,16 @@ struct X86AddressMode { assert(BaseType == X86AddressMode::FrameIndexBase); MO.push_back(MachineOperand::CreateFI(Base.FrameIndex)); } - + MO.push_back(MachineOperand::CreateImm(Scale)); MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, false, false, false, 0, false)); - + if (GV) MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags)); else MO.push_back(MachineOperand::CreateImm(Disp)); - + MO.push_back(MachineOperand::CreateReg(0, false, false, false, false, false, 0, false)); } @@ -122,7 +122,7 @@ static inline const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); - + if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); else { @@ -135,7 +135,7 @@ addFullAddress(const MachineInstrBuilder &MIB, MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else MIB.addImm(AM.Disp); - + return MIB.addReg(0); } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index cbec891d7e..bebe5f033c 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -367,6 +367,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // // SDI - SSE2 instructions with XD prefix. // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. +// SSDI - SSE2 instructions with XS prefix. // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index dabb181cce..cb926f63a4 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -55,39 +55,39 @@ ReMatPICStubLoad("remat-pic-stub-load", enum { // Select which memory operand is being unfolded. - // (stored in bits 0 - 7) + // (stored in bits 0 - 3) TB_INDEX_0 = 0, TB_INDEX_1 = 1, TB_INDEX_2 = 2, TB_INDEX_3 = 3, - TB_INDEX_MASK = 0xff, - - // Minimum alignment required for load/store. - // Used for RegOp->MemOp conversion. - // (stored in bits 8 - 15) - TB_ALIGN_SHIFT = 8, - TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, - TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, - TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, - TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT, + TB_INDEX_MASK = 0xf, // Do not insert the reverse map (MemOp -> RegOp) into the table. // This may be needed because there is a many -> one mapping. - TB_NO_REVERSE = 1 << 16, + TB_NO_REVERSE = 1 << 4, // Do not insert the forward map (RegOp -> MemOp) into the table. // This is needed for Native Client, which prohibits branch // instructions from using a memory operand. - TB_NO_FORWARD = 1 << 17, + TB_NO_FORWARD = 1 << 5, - TB_FOLDED_LOAD = 1 << 18, - TB_FOLDED_STORE = 1 << 19 + TB_FOLDED_LOAD = 1 << 6, + TB_FOLDED_STORE = 1 << 7, + + // Minimum alignment required for load/store. + // Used for RegOp->MemOp conversion. + // (stored in bits 8 - 15) + TB_ALIGN_SHIFT = 8, + TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, + TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, + TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, + TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT }; struct X86OpTblEntry { uint16_t RegOp; uint16_t MemOp; - uint32_t Flags; + uint16_t Flags; }; X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) @@ -415,14 +415,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, + { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, + { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, @@ -499,14 +495,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, - { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 }, - { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 }, - { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 }, - { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 }, - { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 }, { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 }, { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 }, + { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, + { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, + { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 }, + { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, + { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 }, + { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, + { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 }, + { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, + { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, + { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 }, + { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 }, { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE }, { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE }, { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, @@ -815,17 +817,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, - { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, - { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 }, - { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, - { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 }, - { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, - { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 }, - { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, - { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 }, - { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, - { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, - { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 }, { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 5ae6b99e5a..4006dad684 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -145,9 +145,9 @@ class X86InstrInfo : public X86GenInstrInfo { std::pair<unsigned, unsigned> > MemOp2RegOpTableType; MemOp2RegOpTableType MemOp2RegOpTable; - void AddTableEntry(RegOp2MemOpTableType &R2MTable, - MemOp2RegOpTableType &M2RTable, - unsigned RegOp, unsigned MemOp, unsigned Flags); + static void AddTableEntry(RegOp2MemOpTableType &R2MTable, + MemOp2RegOpTableType &M2RTable, + unsigned RegOp, unsigned MemOp, unsigned Flags); public: explicit X86InstrInfo(X86TargetMachine &tm); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 892115b77e..0edd10a355 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -333,6 +333,12 @@ def f128mem : X86MemOperand<"printf128mem"> { let ParserMatchClass = X86Mem128AsmOperand; } def f256mem : X86MemOperand<"printf256mem">{ let ParserMatchClass = X86Mem256AsmOperand; } +def v128mem : X86MemOperand<"printf128mem"> { + let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm); + let ParserMatchClass = X86Mem128AsmOperand; } +def v256mem : X86MemOperand<"printf256mem"> { + let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm); + let ParserMatchClass = X86Mem256AsmOperand; } } // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 56542494b2..5319455dc5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1418,10 +1418,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d, OpndItins itins> { - def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], itins.rr, d>; - def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], itins.rm, d>; } @@ -1622,7 +1622,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; -let Pattern = []<dag> in { +let Pattern = []<dag>, neverHasSideEffects = 1 in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, "cvtss2si{l}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; @@ -1630,14 +1630,16 @@ defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX; + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, TB, VEX, + Requires<[HasAVX]>; defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX; + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, TB, VEX, + Requires<[HasAVX]>; } -let Pattern = []<dag> in { +let Pattern = []<dag>, neverHasSideEffects = 1 in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, "cvtss2si{l}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, XS; @@ -1646,8 +1648,8 @@ defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, SSE_CVT_SS2SI_64>, XS, REX_W; defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, - TB; /* PD SSE3 form is avaiable */ + SSEPackedSingle, SSE_CVT_PS>, TB, + Requires<[HasSSE2]>; } let Predicates = [HasAVX] in { @@ -1788,56 +1790,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, Requires<[HasSSE2]>; } -// Convert doubleword to packed single/double fp -// SSE2 instructions without OpSize prefix -def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], - IIC_SSE_CVT_PS_RR>, - TB, VEX, Requires<[HasAVX]>; -def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PS_RM>, - TB, VEX, Requires<[HasAVX]>; -def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], - IIC_SSE_CVT_PS_RR>, - TB, Requires<[HasSSE2]>; -def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PS_RM>, - TB, Requires<[HasSSE2]>; - -// SSE2 instructions with XS prefix -def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XS, VEX, Requires<[HasAVX]>; -def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, - XS, VEX, Requires<[HasAVX]>; -def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XS, Requires<[HasSSE2]>; -def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, - XS, Requires<[HasSSE2]>; - - // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [], @@ -1858,51 +1810,63 @@ def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PS_RM>; -def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, - VEX; -def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; -def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; -def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>; - -// SSE2 packed instructions with XD prefix -def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XD, VEX, Requires<[HasAVX]>; -def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, - XD, VEX, Requires<[HasAVX]>; -def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XD, Requires<[HasSSE2]>; -def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, - XD, Requires<[HasSSE2]>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), + (VCVTPS2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), + (VCVTPS2DQrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), + (CVTPS2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), + (CVTPS2DQrm addr:$src)>; +} + +// Convert Packed Double FP to Packed DW Integers +let Predicates = [HasAVX] in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. +def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", + (VCVTPD2DQrr VR128:$dst, VR128:$src)>; +def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; +def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", + (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; +} + +def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; +def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), + (VCVTPD2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), + (VCVTPD2DQXrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), + (CVTPD2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), + (CVTPD2DQrm addr:$src)>; +} // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1914,7 +1878,7 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memop addr:$src)))], + (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", @@ -1935,14 +1899,19 @@ def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memop addr:$src)))], + (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_VCVTDQ2PSrr VR128:$src)>; + (VCVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (Int_VCVTDQ2PSrm addr:$src)>; + (VCVTDQ2PSrm addr:$src)>; + + def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), + (VCVTDQ2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), + (VCVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (VCVTTPS2DQrr VR128:$src)>; @@ -1962,9 +1931,14 @@ let Predicates = [HasAVX] in { let Predicates = [HasSSE2] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_CVTDQ2PSrr VR128:$src)>; + (CVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (Int_CVTDQ2PSrm addr:$src)>; + (CVTDQ2PSrm addr:$src)>; + + def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), + (CVTDQ2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), + (CVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (CVTTPS2DQrr VR128:$src)>; @@ -1977,12 +1951,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; -let isCodeGenOnly = 1 in -def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], @@ -1990,31 +1959,38 @@ def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))], + (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; // XMM only -def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; +def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", + (VCVTTPD2DQrr VR128:$dst, VR128:$src)>; def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], + "cvttpd2dqx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; +def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", + (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; + +let Predicates = [HasAVX] in { + def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), + (VCVTTPD2DQYrm addr:$src)>; +} // Predicates = [HasAVX] // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2032,35 +2008,71 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB, VEX; } + +let Predicates = [HasSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, TB; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB; +} + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), + (VCVTPS2PDrr VR128:$src)>; +} -def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - TB, VEX, Requires<[HasAVX]>; -def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))], - IIC_SSE_CVT_PD_RM>, - TB, VEX, Requires<[HasAVX]>; -def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - TB, Requires<[HasSSE2]>; -def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))], - IIC_SSE_CVT_PD_RM>, - TB, Requires<[HasSSE2]>; +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), + (CVTPS2PDrr VR128:$src)>; +} + +// Convert Packed DW Integers to Packed Double FP +let Predicates = [HasAVX] in { +def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + +def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; +def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; + +// 128 bit register conversion intrinsics +let Predicates = [HasAVX] in +def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), + (VCVTDQ2PDrr VR128:$src)>; + +let Predicates = [HasSSE2] in +def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), + (CVTDQ2PDrr VR128:$src)>; + +// AVX 256-bit register conversion intrinsics +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), + (VCVTDQ2PDYrr VR128:$src)>; + def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), + (VCVTDQ2PDYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), + (VCVTPD2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTPD2DQYrm addr:$src)>; + + def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), + (VCVTDQ2PDYrr VR128:$src)>; + def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), + (VCVTDQ2PDYrm addr:$src)>; +} // Predicates = [HasAVX] // Convert packed double to packed single // The assembler can recognize rr 256-bit instructions by seeing a ymm @@ -2069,25 +2081,24 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; -def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; // XMM only -def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; +def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", + (VCVTPD2PSrr VR128:$dst, VR128:$src)>; def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; +def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", + (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; + def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>; @@ -2096,64 +2107,60 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), IIC_SSE_CVT_PD_RM>; -def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; -def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), + (VCVTPD2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), + (VCVTPD2PSXrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), + (CVTPD2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), + (CVTPD2PSrm addr:$src)>; +} // AVX 256-bit register conversion intrinsics // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below // whenever possible to avoid declaring two versions of each one. -def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), - (VCVTDQ2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), - (VCVTDQ2PSYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), - (VCVTPD2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), - (VCVTPD2PSYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), - (VCVTPS2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), - (VCVTPS2DQYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), - (VCVTPS2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), - (VCVTPS2PDYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), - (VCVTTPD2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTTPD2DQYrm addr:$src)>; - -// Match fround and fextend for 128/256-bit conversions -def : Pat<(v4f32 (fround (v4f64 VR256:$src))), - (VCVTPD2PSYrr VR256:$src)>; -def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), - (VCVTPD2PSYrm addr:$src)>; - -def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), - (VCVTPS2PDYrr VR128:$src)>; -def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), - (VCVTPS2PDYrm addr:$src)>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), + (VCVTDQ2PSYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), + (VCVTDQ2PSYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), + (VCVTPD2PSYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), + (VCVTPD2PSYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), + (VCVTPS2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), + (VCVTPS2DQYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), + (VCVTPS2PDYrr VR128:$src)>; + def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), + (VCVTPS2PDYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTTPD2DQYrm addr:$src)>; + + // Match fround and fextend for 128/256-bit conversions + def : Pat<(v4f32 (fround (v4f64 VR256:$src))), + (VCVTPD2PSYrr VR256:$src)>; + def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), + (VCVTPD2PSYrm addr:$src)>; + + def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), + (VCVTPS2PDYrr VR128:$src)>; + def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + (VCVTPS2PDYrm addr:$src)>; +} //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions @@ -4889,80 +4896,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; //===---------------------------------------------------------------------===// -// SSE3 - Conversion Instructions -//===---------------------------------------------------------------------===// - -// Convert Packed Double FP to Packed DW Integers -let Predicates = [HasAVX] in { -// The assembler can recognize rr 256-bit instructions by seeing a ymm -// register, but the same isn't true when using memory operands instead. -// Provide other assembly rr and rm forms to address this explicitly. -def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; - -// XMM only -def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; - -// YMM only -def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; -} - -def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>; -def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; - -def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), - (VCVTTPD2DQYrr VR256:$src)>; -def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), - (VCVTTPD2DQYrm addr:$src)>; - -// Convert Packed DW Integers to Packed Double FP -let Predicates = [HasAVX] in { -def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -} - -def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; -def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>; - -// AVX 256-bit register conversion intrinsics -def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), - (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), - (VCVTDQ2PDYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), - (VCVTPD2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTPD2DQYrm addr:$src)>; - -def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), - (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (VCVTDQ2PDYrm addr:$src)>; - -//===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, @@ -7339,8 +7272,8 @@ let ExeDomain = SSEPackedSingle in { int_x86_avx2_vbroadcast_ss_ps_256>; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, - int_x86_avx2_vbroadcast_sd_pd_256>; +def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, + int_x86_avx2_vbroadcast_sd_pd_256>; let Predicates = [HasAVX2] in def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, @@ -7751,6 +7684,31 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), (VPBROADCASTQYrm addr:$src)>; + def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), + (VPBROADCASTBrr VR128:$src)>; + def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), + (VPBROADCASTBYrr VR128:$src)>; + def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), + (VPBROADCASTWrr VR128:$src)>; + def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), + (VPBROADCASTWYrr VR128:$src)>; + def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), + (VPBROADCASTDrr VR128:$src)>; + def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), + (VPBROADCASTDYrr VR128:$src)>; + def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), + (VPBROADCASTQrr VR128:$src)>; + def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), + (VPBROADCASTQYrr VR128:$src)>; + def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), + (VBROADCASTSSrr VR128:$src)>; + def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), + (VBROADCASTSSYrr VR128:$src)>; + def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), + (VPBROADCASTQrr VR128:$src)>; + def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), + (VBROADCASTSDYrr VR128:$src)>; + // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. let AddedComplexity = 20 in { @@ -7761,7 +7719,7 @@ let Predicates = [HasAVX2] in { (VBROADCASTSSYrr (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), - (VBROADCASTSDrr + (VBROADCASTSDYrr (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), @@ -7771,7 +7729,7 @@ let Predicates = [HasAVX2] in { (VBROADCASTSSYrr (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), - (VBROADCASTSDrr + (VBROADCASTSDYrr (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>; } } @@ -8061,3 +8019,55 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; + +//===----------------------------------------------------------------------===// +// VGATHER - GATHER Operations +multiclass avx2_gather<bits<8> opc, string OpcodeStr, + RegisterClass RC256, X86MemOperand memop256, + Intrinsic IntGather128, Intrinsic IntGather256> { + def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, v128mem:$src2, VR128:$mask), + !strconcat(OpcodeStr, + "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), + []>, VEX_4VOp3; + def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst), + (ins RC256:$src1, memop256:$src2, RC256:$mask), + !strconcat(OpcodeStr, + "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), + []>, VEX_4VOp3, VEX_L; +} + +let Constraints = "$src1 = $dst" in { + defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", + VR256, v128mem, + int_x86_avx2_gather_d_pd, + int_x86_avx2_gather_d_pd_256>, VEX_W; + defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", + VR256, v256mem, + int_x86_avx2_gather_q_pd, + int_x86_avx2_gather_q_pd_256>, VEX_W; + defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", + VR256, v256mem, + int_x86_avx2_gather_d_ps, + int_x86_avx2_gather_d_ps_256>; + defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", + VR128, v256mem, + int_x86_avx2_gather_q_ps, + int_x86_avx2_gather_q_ps_256>; + defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", + VR256, v128mem, + int_x86_avx2_gather_d_q, + int_x86_avx2_gather_d_q_256>, VEX_W; + defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", + VR256, v256mem, + int_x86_avx2_gather_q_q, + int_x86_avx2_gather_q_q_256>, VEX_W; + defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", + VR256, v256mem, + int_x86_avx2_gather_d_d, + int_x86_avx2_gather_d_d_256>; + defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", + VR128, v256mem, + int_x86_avx2_gather_q_d, + int_x86_avx2_gather_q_d_256>; +} diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 6a8f0c8486..6d3548f093 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -17,17 +17,17 @@ // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1be4c3864a..ed086dd8ad 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,10 +50,6 @@ ForceStackAlign("force-align-stack", " needed for the function."), cl::init(false), cl::Hidden); -cl::opt<bool> -EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), - cl::desc("Enable use of a base pointer for complex stack frames")); - // @LOCALMOD-BEGIN extern cl::opt<bool> FlagUseZeroBasedSandbox; extern cl::opt<bool> FlagRestrictR15; @@ -77,12 +73,10 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, SlotSize = 8; StackPtr = X86::RSP; FramePtr = X86::RBP; - BasePtr = X86::RBX; } else { SlotSize = 4; StackPtr = X86::ESP; FramePtr = X86::EBP; - BasePtr = X86::EBX; } } @@ -301,20 +295,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*I); } - // Set the base-pointer register and its aliases as reserved if needed. - if (hasBasePointer(MF)) { - CallingConv::ID CC = MF.getFunction()->getCallingConv(); - const uint32_t* RegMask = getCallPreservedMask(CC); - if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) - report_fatal_error( - "Stack realignment in presence of dynamic allocas is not supported with" - "this calling convention."); - - Reserved.set(getBaseRegister()); - for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I) - Reserved.set(*I); - } - // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); @@ -384,35 +364,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - if (!EnableBasePointer) - return false; - - // When we need stack realignment and there are dynamic allocas, we can't - // reference off of the stack pointer, so we reserve a base pointer. - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) - return true; - - return false; -} - bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const MachineRegisterInfo *MRI = &MF.getRegInfo(); - if (!MF.getTarget().Options.RealignStack) - return false; - - // Stack realignment requires a frame pointer. If we already started - // register allocation with frame pointer elimination, it is too late now. - if (!MRI->canReserveReg(FramePtr)) - return false; - - // If base pointer is necessary. Check that it isn't too late to reserve it. - if (MFI->hasVarSizedObjects()) - return MRI->canReserveReg(BasePtr); - return true; + return (MF.getTarget().Options.RealignStack && + !MFI->hasVarSizedObjects()); } bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { @@ -422,6 +377,13 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); + // FIXME: Currently we don't support stack realignment for functions with + // variable-sized allocas. + // FIXME: It's more complicated than this... + if (0 && requiresRealignment && MFI->hasVarSizedObjects()) + report_fatal_error( + "Stack realignment in presence of dynamic allocas is not supported"); + // If we've requested that we force align the stack do so now. if (ForceStackAlign) return canRealignStack(MF); @@ -561,9 +523,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Opc = MI.getOpcode(); bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm; - if (hasBasePointer(MF)) - BasePtr = getBaseRegister(); - else if (needsStackRealignment(MF)) + if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else if (AfterFPPop) BasePtr = StackPtr; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 1bc32cbb78..ee69842b10 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -50,11 +50,6 @@ private: /// unsigned FramePtr; - /// BasePtr - X86 physical register used as a base ptr in complex stack - /// frames. I.e., when we need a 3rd base, not just SP and FP, due to - /// variable size stack objects. - unsigned BasePtr; - public: X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); @@ -111,8 +106,6 @@ public: /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasBasePointer(const MachineFunction &MF) const; - bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; @@ -130,7 +123,6 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } - unsigned getBaseRegister() const { return BasePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 6d05a91a32..20acc2bab3 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -150,44 +150,44 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { bool X86PassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel())); + addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); // For ELF, cleanup any local-dynamic TLS accesses. if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) - PM->add(createCleanupLocalDynamicTLSPass()); + addPass(createCleanupLocalDynamicTLSPass()); // For 32-bit, prepend instructions to set the "global base reg" for PIC. if (!getX86Subtarget().is64Bit()) - PM->add(createGlobalBaseRegPass()); + addPass(createGlobalBaseRegPass()); return false; } bool X86PassConfig::addPreRegAlloc() { - PM->add(createX86MaxStackAlignmentHeuristicPass()); + addPass(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } bool X86PassConfig::addPostRegAlloc() { - PM->add(createX86FloatingPointStackifierPass()); + addPass(createX86FloatingPointStackifierPass()); return true; // -print-machineinstr should print after this. } bool X86PassConfig::addPreEmitPass() { bool ShouldPrint = false; if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) { - PM->add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + addPass(createExecutionDependencyFixPass(&X86::VR128RegClass)); ShouldPrint = true; } if (getX86Subtarget().hasAVX() && UseVZeroUpper) { - PM->add(createX86IssueVZeroUpperPass()); + addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } // @LOCALMOD-START if (getX86Subtarget().isTargetNaCl()) { - PM->add(createX86NaClRewritePass()); + addPass(createX86NaClRewritePass()); ShouldPrint = true; } // @LOCALMOD-END diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 32bfba96bb..4f39d68d40 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -10,17 +10,19 @@ #include "X86TargetObjectFile.h" #include "X86TargetMachine.h" #include "X86Subtarget.h" // @LOCALMOD +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" // @LOCALMOD +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/Mangler.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" using namespace llvm; using namespace dwarf; -const MCExpr *X8664_MachoTargetObjectFile:: +const MCExpr *X86_64MachoTargetObjectFile:: getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const { @@ -39,12 +41,18 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } -MCSymbol *X8664_MachoTargetObjectFile:: +MCSymbol *X86_64MachoTargetObjectFile:: getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const { return Mang->getSymbol(GV); } +void +X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + // @LOCALMOD-START // NOTE: this was largely lifted from // lib/Target/ARM/ARMTargetObjectFile.cpp diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 34c1234eae..5fac48e57a 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -16,9 +16,9 @@ namespace llvm { - /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin + /// X86_64MachoTargetObjectFile - This TLOF implementation is used for Darwin /// x86-64. - class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { + class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: virtual const MCExpr * getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, @@ -32,6 +32,12 @@ namespace llvm { MachineModuleInfo *MMI) const; }; + /// X86LinuxTargetObjectFile - This implementation is used for linux x86 + /// and x86-64. + class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + // @LOCALMOD-BEGIN class TargetLoweringObjectFileNaCl : public TargetLoweringObjectFileELF { public: |