aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2012-08-17 14:35:45 -0700
committerDerek Schuff <dschuff@chromium.org>2012-08-17 14:35:45 -0700
commitb62e9abf7dd9e39c95327914ce9dfe216386824a (patch)
treec683f0bcbef19f622727251165eaf89a4f806c62 /lib/Target/X86
parent66f65db9406ca9e59d4bfed89436f668d6a84374 (diff)
parentc723eb1aef817d47feec620933ee1ec6005cdd14 (diff)
Merge commit 'c723eb1aef817d47feec620933ee1ec6005cdd14'
This merges r159618 from upstream into master. It goes with clang rev af50aab0c317462129d73ae8000c6394c718598d Conflicts: include/llvm/CodeGen/LexicalScopes.h include/llvm/Target/TargetOptions.h lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMTargetMachine.cpp lib/Target/ARM/ARMTargetObjectFile.cpp lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp lib/Target/Mips/MipsISelDAGToDAG.cpp lib/Target/Mips/MipsInstrFPU.td lib/Target/Mips/MipsMCInstLower.cpp lib/Target/Mips/MipsTargetMachine.cpp lib/Target/TargetMachine.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86RegisterInfo.cpp lib/Target/X86/X86TargetObjectFile.cpp lib/Target/X86/X86TargetObjectFile.h tools/llc/llc.cpp (tools/llc/llc.cpp is from a merged version of r160532 because it was a bit hairy and I didn't want to redo it.)
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp75
-rw-r--r--lib/Target/X86/CMakeLists.txt2
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp35
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h14
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp7
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp41
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp99
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp91
-rw-r--r--lib/Target/X86/X86InstrBuilder.h16
-rw-r--r--lib/Target/X86/X86InstrFormats.td1
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp70
-rw-r--r--lib/Target/X86/X86InstrInfo.h6
-rw-r--r--lib/Target/X86/X86InstrInfo.td6
-rw-r--r--lib/Target/X86/X86InstrSSE.td586
-rw-r--r--lib/Target/X86/X86InstrVMX.td8
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp60
-rw-r--r--lib/Target/X86/X86RegisterInfo.h8
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp16
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp14
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h10
22 files changed, 655 insertions, 519 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 08c732c388..417842b467 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -187,7 +187,7 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
-
+
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
virtual void print(raw_ostream &OS) const {}
@@ -309,25 +309,25 @@ struct X86Operand : public MCParsedAsmOperand {
}
bool isMem() const { return Kind == Memory; }
- bool isMem8() const {
+ bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
}
- bool isMem16() const {
+ bool isMem16() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 16);
}
- bool isMem32() const {
+ bool isMem32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32);
}
- bool isMem64() const {
+ bool isMem64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64);
}
- bool isMem80() const {
+ bool isMem80() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 80);
}
- bool isMem128() const {
+ bool isMem128() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 128);
}
- bool isMem256() const {
+ bool isMem256() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 256);
}
@@ -356,26 +356,26 @@ struct X86Operand : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
- void addMem8Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem8Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem16Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem16Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem80Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem80Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem128Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem128Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem256Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem256Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -467,7 +467,7 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) {
bool X86AsmParser::isDstOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
- return Op.isMem() &&
+ return Op.isMem() &&
(Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
@@ -611,7 +611,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
Parser.Lex();
-
+
if (getLexer().is(AsmToken::Identifier)) {
// Parse BaseReg
if (ParseRegister(BaseReg, Start, End)) {
@@ -668,7 +668,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
End = Parser.getTok().getLoc();
if (!IndexReg)
ParseRegister(IndexReg, Start, End);
- else if (getParser().ParseExpression(Disp, End)) return 0;
+ else if (getParser().ParseExpression(Disp, End)) return 0;
}
}
@@ -916,15 +916,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// If we have both a base register and an index register make sure they are
// both 64-bit or 32-bit registers.
+ // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
if (BaseReg != 0 && IndexReg != 0) {
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
- !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) &&
+ (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
IndexReg != X86::RIZ) {
Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
return 0;
}
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
- !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) &&
+ (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
IndexReg != X86::EIZ){
Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
return 0;
@@ -944,7 +947,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
PatchedName != "setb" && PatchedName != "setnb")
PatchedName = PatchedName.substr(0, Name.size()-1);
-
+
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
@@ -1217,7 +1220,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
}
-
+
// Transforms "int $3" into "int3" as a size optimization. We can't write an
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
@@ -1520,7 +1523,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
case Match_Success:
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
- // individual transformations can chain off each other.
+ // individual transformations can chain off each other.
while (processInstruction(Inst, Operands))
;
@@ -1558,12 +1561,12 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// Otherwise, we assume that this may be an integer instruction, which comes
// in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
-
+
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
unsigned Match1, Match2, Match3, Match4;
-
+
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
Tmp[Base.size()] = Suffixes[1];
Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
@@ -1691,19 +1694,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
const MCExpr *Value;
if (getParser().ParseExpression(Value))
return true;
-
+
getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
-
+
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
Parser.Lex();
}
}
-
+
Parser.Lex();
return false;
}
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 5b402da3ad..45fd42f205 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -53,6 +53,8 @@ endif()
add_llvm_target(X86CodeGen ${sources})
+add_dependencies(LLVMX86CodeGen intrinsics_gen)
+
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index b13a00620b..d58e36c803 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -498,7 +498,38 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
} else {
baseReg = MCOperand::CreateReg(0);
}
-
+
+ // Check whether we are handling VSIB addressing mode for GATHER.
+ // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
+ // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
+ // I don't see a way to get the correct IndexReg in readSIB:
+ // We can tell whether it is VSIB or SIB after instruction ID is decoded,
+ // but instruction ID may not be decoded yet when calling readSIB.
+ uint32_t Opcode = mcInst.getOpcode();
+ bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
+ Opcode == X86::VGATHERDPDYrm ||
+ Opcode == X86::VGATHERQPDrm ||
+ Opcode == X86::VGATHERDPSrm ||
+ Opcode == X86::VGATHERQPSrm ||
+ Opcode == X86::VPGATHERDQrm ||
+ Opcode == X86::VPGATHERDQYrm ||
+ Opcode == X86::VPGATHERQQrm ||
+ Opcode == X86::VPGATHERDDrm ||
+ Opcode == X86::VPGATHERQDrm);
+ bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
+ Opcode == X86::VGATHERDPSYrm ||
+ Opcode == X86::VGATHERQPSYrm ||
+ Opcode == X86::VPGATHERQQYrm ||
+ Opcode == X86::VPGATHERDDYrm ||
+ Opcode == X86::VPGATHERQDYrm);
+ if (IndexIs128 || IndexIs256) {
+ unsigned IndexOffset = insn.sibIndex -
+ (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
+ SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
+ insn.sibIndex = (SIBIndex)(IndexBase +
+ (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
+ }
+
if (insn.sibIndex != SIB_INDEX_NONE) {
switch (insn.sibIndex) {
default:
@@ -509,6 +540,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
indexReg = MCOperand::CreateReg(X86::x); break;
EA_BASES_32BIT
EA_BASES_64BIT
+ REGS_XMM
+ REGS_YMM
#undef ENTRY
}
} else {
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index fae309b45d..e2caf6a2a8 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -310,11 +310,14 @@ typedef enum {
* SIBIndex - All possible values of the SIB index field.
* Borrows entries from ALL_EA_BASES with the special case that
* sib is synonymous with NONE.
+ * Vector SIB: index can be XMM or YMM.
*/
typedef enum {
SIB_INDEX_NONE,
#define ENTRY(x) SIB_INDEX_##x,
ALL_EA_BASES
+ REGS_XMM
+ REGS_YMM
#undef ENTRY
SIB_INDEX_max
} SIBIndex;
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index c3f46ebda0..b0e5be3162 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -483,17 +483,17 @@ namespace X86II {
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
- static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
- static inline bool hasImm(uint64_t TSFlags) {
+ inline bool hasImm(uint64_t TSFlags) {
return (TSFlags & X86II::ImmMask) != 0;
}
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
- static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ inline unsigned getSizeOfImm(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8:
@@ -508,7 +508,7 @@ namespace X86II {
/// isImmPCRel - Return true if the immediate of the specified instruction's
/// TSFlags indicates that it is pc relative.
- static inline unsigned isImmPCRel(uint64_t TSFlags) {
+ inline unsigned isImmPCRel(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8PCRel:
@@ -531,7 +531,7 @@ namespace X86II {
/// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
/// counted as one operand.
///
- static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
+ inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
// FIXME: Remove this form.
@@ -594,7 +594,7 @@ namespace X86II {
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
/// higher) register? e.g. r8, xmm8, xmm13, etc.
- static inline bool isX86_64ExtendedReg(unsigned RegNo) {
+ inline bool isX86_64ExtendedReg(unsigned RegNo) {
switch (RegNo) {
default: break;
case X86::R8: case X86::R9: case X86::R10: case X86::R11:
@@ -616,7 +616,7 @@ namespace X86II {
return false;
}
- static inline bool isX86_64NonExtLowByteReg(unsigned reg) {
+ inline bool isX86_64NonExtLowByteReg(unsigned reg) {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 61e2fdcb62..7f7873acd1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -621,7 +621,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_X = 0x0;
if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
+ // Instruction format for 4VOp3:
+ // src1(ModR/M), MemAddr, src3(VEX_4V)
+ // CurOp points to start of the MemoryOperand,
+ // it skips TIED_TO operands if exist, then increments past src1.
+ // CurOp + X86::AddrNumOperands will point to src3.
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
break;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index ee66e7ce1c..599c8f8c6d 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -20,10 +20,10 @@
#include "X86TargetMachine.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "llvm/CallingConv.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -436,7 +436,9 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO = MI->getOperand(OpNo);
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'a': // This is an address. Currently only 'i' and 'r' are expected.
if (MO.isImm()) {
O << MO.getImm();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index af9efbd906..e263e44f40 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -52,7 +52,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit());
+ MMI.callsUnwindInit() || MMI.callsEHReturn());
}
static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
@@ -652,7 +652,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
- unsigned BasePtr = RegInfo->getBaseRegister();
DebugLoc DL;
// If we're forcing a stack realignment we can't rely on just the frame
@@ -916,18 +915,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
UseLEA, TII, *RegInfo);
- // If we need a base pointer, set it up here. It's whatever the value
- // of the stack pointer is at this point. Any variable size objects
- // will be allocated after this, so we can still use the base pointer
- // to reference locals.
- if (RegInfo->hasBasePointer(MF)) {
- // Update the frame pointer with the current stack pointer.
- unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
- BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
- .addReg(StackPtr)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
@@ -1184,16 +1171,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con
int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
uint64_t StackSize = MFI->getStackSize();
- if (RegInfo->hasBasePointer(MF)) {
- assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
- if (FI < 0) {
- // Skip the saved EBP.
- return Offset + RegInfo->getSlotSize();
- } else {
- assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
- return Offset + StackSize;
- }
- } else if (RegInfo->needsStackRealignment(MF)) {
+ if (RegInfo->needsStackRealignment(MF)) {
if (FI < 0) {
// Skip the saved EBP.
return Offset + RegInfo->getSlotSize();
@@ -1224,14 +1202,9 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
// We can't calculate offset from frame pointer if the stack is realigned,
- // so enforce usage of stack/base pointer. The base pointer is used when we
- // have dynamic allocas in addition to dynamic realignment.
- if (RegInfo->hasBasePointer(MF))
- FrameReg = RegInfo->getBaseRegister();
- else if (RegInfo->needsStackRealignment(MF))
- FrameReg = RegInfo->getStackRegister();
- else
- FrameReg = RegInfo->getFrameRegister(MF);
+ // so enforce usage of stack pointer.
+ FrameReg = (RegInfo->needsStackRealignment(MF)) ?
+ RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF);
return getFrameIndexOffset(MF, FI);
}
@@ -1368,10 +1341,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
"Slot for EBP register must be last in order to be found!");
(void)FrameIdx;
}
-
- // Spill the BasePtr if it's used.
- if (RegInfo->hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
}
static bool
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c8ff1cf0d0..2871a790c6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -188,6 +188,7 @@ namespace {
private:
SDNode *Select(SDNode *N);
+ SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
@@ -2165,6 +2166,30 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
llvm_unreachable("unrecognized size for LdVT");
}
+/// SelectGather - Customized ISel for GATHER operations.
+///
+SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+ // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
+ SDValue Chain = Node->getOperand(0);
+ SDValue VSrc = Node->getOperand(2);
+ SDValue Base = Node->getOperand(3);
+ SDValue VIdx = Node->getOperand(4);
+ SDValue VMask = Node->getOperand(5);
+ ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
+ if (!Scale)
+ return 0;
+
+ // Memory Operands: Base, Scale, Index, Disp, Segment
+ SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i32);
+ const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
+ Disp, Segment, VMask, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ VSrc.getValueType(), MVT::Other,
+ Ops, array_lengthof(Ops));
+ return ResNode;
+}
+
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
@@ -2180,23 +2205,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default: break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_avx2_gather_d_pd:
+ case Intrinsic::x86_avx2_gather_d_pd_256:
+ case Intrinsic::x86_avx2_gather_q_pd:
+ case Intrinsic::x86_avx2_gather_q_pd_256:
+ case Intrinsic::x86_avx2_gather_d_ps:
+ case Intrinsic::x86_avx2_gather_d_ps_256:
+ case Intrinsic::x86_avx2_gather_q_ps:
+ case Intrinsic::x86_avx2_gather_q_ps_256:
+ case Intrinsic::x86_avx2_gather_d_q:
+ case Intrinsic::x86_avx2_gather_d_q_256:
+ case Intrinsic::x86_avx2_gather_q_q:
+ case Intrinsic::x86_avx2_gather_q_q_256:
+ case Intrinsic::x86_avx2_gather_d_d:
+ case Intrinsic::x86_avx2_gather_d_d_256:
+ case Intrinsic::x86_avx2_gather_q_d:
+ case Intrinsic::x86_avx2_gather_q_d_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
+ case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
+ case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
+ case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
+ case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
+ case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
+ case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
+ case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
+ case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
+ }
+ SDNode *RetVal = SelectGather(Node, Opc);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+ }
+ break;
+ }
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
+
case X86ISD::ATOMOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMOR6432);
case X86ISD::ATOMXOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMXOR6432);
case X86ISD::ATOMADD64_DAG:
- return SelectAtomic64(Node, X86::ATOMADD6432);
case X86ISD::ATOMSUB64_DAG:
- return SelectAtomic64(Node, X86::ATOMSUB6432);
case X86ISD::ATOMNAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMNAND6432);
case X86ISD::ATOMAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMAND6432);
- case X86ISD::ATOMSWAP64_DAG:
- return SelectAtomic64(Node, X86::ATOMSWAP6432);
+ case X86ISD::ATOMSWAP64_DAG: {
+ unsigned Opc;
+ switch (Opcode) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
+ case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
+ case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
+ case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
+ case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
+ case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
+ }
+ SDNode *RetVal = SelectAtomic64(Node, Opc);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
case ISD::ATOMIC_LOAD_ADD: {
SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index bf559c98dd..4197c35adb 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -99,6 +99,10 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
unsigned IdxVal, SelectionDAG &DAG,
DebugLoc dl) {
+ // Inserting UNDEF is Result
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return Result;
+
EVT VT = Vec.getValueType();
assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
@@ -114,9 +118,8 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
* ElemsPerChunk);
SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
- VecIdx);
- return Result;
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
}
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
@@ -136,10 +139,13 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
if (Subtarget->isTargetEnvMacho()) {
if (is64Bit)
- return new X8664_MachoTargetObjectFile();
+ return new X86_64MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
}
+ if (Subtarget->isTargetLinux())
+ return new X86LinuxTargetObjectFile();
+
// @LOCALMOD-BEGIN
if (Subtarget->isTargetNaCl())
return new TargetLoweringObjectFileNaCl();
@@ -3536,6 +3542,52 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
return true;
}
+//
+// Some special combinations that can be optimized.
+//
+static
+SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ EVT VT = SVOp->getValueType(0);
+ DebugLoc dl = SVOp->getDebugLoc();
+
+ if (VT != MVT::v8i32 && VT != MVT::v8f32)
+ return SDValue();
+
+ ArrayRef<int> Mask = SVOp->getMask();
+
+ // These are the special masks that may be optimized.
+ static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ bool MatchEvenMask = true;
+ bool MatchOddMask = true;
+ for (int i=0; i<8; ++i) {
+ if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
+ MatchEvenMask = false;
+ if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
+ MatchOddMask = false;
+ }
+ static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1};
+ static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1};
+
+ const int *CompactionMask;
+ if (MatchEvenMask)
+ CompactionMask = CompactionMaskEven;
+ else if (MatchOddMask)
+ CompactionMask = CompactionMaskOdd;
+ else
+ return SDValue();
+
+ SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
+
+ SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0),
+ UndefNode, CompactionMask);
+ SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1),
+ UndefNode, CompactionMask);
+ static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13};
+ return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask);
+}
+
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
@@ -5041,8 +5093,16 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
SDValue Sc = Op.getOperand(0);
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
- Sc.getOpcode() != ISD::BUILD_VECTOR)
- return SDValue();
+ Sc.getOpcode() != ISD::BUILD_VECTOR) {
+
+ if (!Subtarget->hasAVX2())
+ return SDValue();
+
+ // Use the register form of the broadcast instruction available on AVX2.
+ if (VT.is256BitVector())
+ Sc = Extract128BitVector(Sc, 0, DAG, dl);
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
+ }
Ld = Sc.getOperand(0);
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
@@ -6022,6 +6082,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
/// which could not be matched by any known target speficic shuffle
static SDValue
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+
+ SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
@@ -7504,11 +7569,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
- // If GV is an alias then use the aliasee for determining
- // thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->resolveAliasedGlobal(false);
-
TLSModel::Model model = getTargetMachine().getTLSModel(GV);
// @LOCALMOD-START
@@ -9995,7 +10055,6 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
@@ -10015,7 +10074,6 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
- MF.getRegInfo().addLiveOut(StoreAddrReg);
return DAG.getNode(X86ISD::EH_RETURN, dl,
MVT::Other,
@@ -16240,12 +16298,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// wrong class. This can happen with constraints like {xmm0} where the
// target independent register mapper will just pick the first match it can
// find, ignoring the required type.
- if (VT == MVT::f32)
+
+ if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32RegClass;
- else if (VT == MVT::f64)
+ else if (VT == MVT::f64 || VT == MVT::i64)
Res.second = &X86::FR64RegClass;
else if (X86::VR128RegClass.hasType(VT))
Res.second = &X86::VR128RegClass;
+ else if (X86::VR256RegClass.hasType(VT))
+ Res.second = &X86::VR256RegClass;
}
return Res;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index fa1d67644d..aaef4a466d 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -55,11 +55,11 @@ struct X86AddressMode {
: BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
Base.Reg = 0;
}
-
-
+
+
void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
-
+
if (BaseType == X86AddressMode::RegBase)
MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
false, false, false, 0, false));
@@ -67,16 +67,16 @@ struct X86AddressMode {
assert(BaseType == X86AddressMode::FrameIndexBase);
MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
}
-
+
MO.push_back(MachineOperand::CreateImm(Scale));
MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
false, false, false, 0, false));
-
+
if (GV)
MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
else
MO.push_back(MachineOperand::CreateImm(Disp));
-
+
MO.push_back(MachineOperand::CreateReg(0, false, false,
false, false, false, 0, false));
}
@@ -122,7 +122,7 @@ static inline const MachineInstrBuilder &
addFullAddress(const MachineInstrBuilder &MIB,
const X86AddressMode &AM) {
assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
-
+
if (AM.BaseType == X86AddressMode::RegBase)
MIB.addReg(AM.Base.Reg);
else {
@@ -135,7 +135,7 @@ addFullAddress(const MachineInstrBuilder &MIB,
MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
else
MIB.addImm(AM.Disp);
-
+
return MIB.addReg(0);
}
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index cbec891d7e..bebe5f033c 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -367,6 +367,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// SSDI - SSE2 instructions with XS prefix.
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with TB and OpSize prefixes.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index dabb181cce..cb926f63a4 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -55,39 +55,39 @@ ReMatPICStubLoad("remat-pic-stub-load",
enum {
// Select which memory operand is being unfolded.
- // (stored in bits 0 - 7)
+ // (stored in bits 0 - 3)
TB_INDEX_0 = 0,
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
TB_INDEX_3 = 3,
- TB_INDEX_MASK = 0xff,
-
- // Minimum alignment required for load/store.
- // Used for RegOp->MemOp conversion.
- // (stored in bits 8 - 15)
- TB_ALIGN_SHIFT = 8,
- TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
- TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
- TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
- TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT,
+ TB_INDEX_MASK = 0xf,
// Do not insert the reverse map (MemOp -> RegOp) into the table.
// This may be needed because there is a many -> one mapping.
- TB_NO_REVERSE = 1 << 16,
+ TB_NO_REVERSE = 1 << 4,
// Do not insert the forward map (RegOp -> MemOp) into the table.
// This is needed for Native Client, which prohibits branch
// instructions from using a memory operand.
- TB_NO_FORWARD = 1 << 17,
+ TB_NO_FORWARD = 1 << 5,
- TB_FOLDED_LOAD = 1 << 18,
- TB_FOLDED_STORE = 1 << 19
+ TB_FOLDED_LOAD = 1 << 6,
+ TB_FOLDED_STORE = 1 << 7,
+
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion.
+ // (stored in bits 8 - 15)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
};
struct X86OpTblEntry {
uint16_t RegOp;
uint16_t MemOp;
- uint32_t Flags;
+ uint16_t Flags;
};
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
@@ -415,14 +415,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
{ X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
{ X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
- { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
{ X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
+ { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
@@ -499,14 +495,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
// AVX 128-bit versions of foldable instructions
{ X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
{ X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
- { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 },
{ X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
{ X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
+ { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
{ X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
@@ -815,17 +817,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
- { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
- { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
- { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 },
- { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
- { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
- { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
- { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 },
- { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
- { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 },
{ X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5ae6b99e5a..4006dad684 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -145,9 +145,9 @@ class X86InstrInfo : public X86GenInstrInfo {
std::pair<unsigned, unsigned> > MemOp2RegOpTableType;
MemOp2RegOpTableType MemOp2RegOpTable;
- void AddTableEntry(RegOp2MemOpTableType &R2MTable,
- MemOp2RegOpTableType &M2RTable,
- unsigned RegOp, unsigned MemOp, unsigned Flags);
+ static void AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags);
public:
explicit X86InstrInfo(X86TargetMachine &tm);
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 892115b77e..0edd10a355 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -333,6 +333,12 @@ def f128mem : X86MemOperand<"printf128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
def f256mem : X86MemOperand<"printf256mem">{
let ParserMatchClass = X86Mem256AsmOperand; }
+def v128mem : X86MemOperand<"printf128mem"> {
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
+ let ParserMatchClass = X86Mem128AsmOperand; }
+def v256mem : X86MemOperand<"printf256mem"> {
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
+ let ParserMatchClass = X86Mem256AsmOperand; }
}
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 56542494b2..5319455dc5 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1418,10 +1418,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d, OpndItins itins> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (OpNode SrcRC:$src))],
itins.rr, d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
itins.rm, d>;
}
@@ -1622,7 +1622,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
"cvttsd2si{q}", SSE_CVT_SD2SI>,
XD, REX_W;
-let Pattern = []<dag> in {
+let Pattern = []<dag>, neverHasSideEffects = 1 in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
"cvtss2si{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
@@ -1630,14 +1630,16 @@ defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
"cvtss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
+ Requires<[HasAVX]>;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
+ Requires<[HasAVX]>;
}
-let Pattern = []<dag> in {
+let Pattern = []<dag>, neverHasSideEffects = 1 in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
"cvtss2si{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>, XS;
@@ -1646,8 +1648,8 @@ defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
SSE_CVT_SS2SI_64>, XS, REX_W;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>,
- TB; /* PD SSE3 form is avaiable */
+ SSEPackedSingle, SSE_CVT_PS>, TB,
+ Requires<[HasSSE2]>;
}
let Predicates = [HasAVX] in {
@@ -1788,56 +1790,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Requires<[HasSSE2]>;
}
-// Convert doubleword to packed single/double fp
-// SSE2 instructions without OpSize prefix
-def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vcvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PS_RM>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- TB, Requires<[HasSSE2]>;
-def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PS_RM>,
- TB, Requires<[HasSSE2]>;
-
-// SSE2 instructions with XS prefix
-def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XS, VEX, Requires<[HasAVX]>;
-def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>,
- XS, VEX, Requires<[HasAVX]>;
-def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XS, Requires<[HasSSE2]>;
-def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>,
- XS, Requires<[HasSSE2]>;
-
-
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", [],
@@ -1858,51 +1810,63 @@ def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PS_RM>;
-def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- VEX;
-def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
-def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
-
-// SSE2 packed instructions with XD prefix
-def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XD, VEX, Requires<[HasAVX]>;
-def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- XD, VEX, Requires<[HasAVX]>;
-def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XD, Requires<[HasSSE2]>;
-def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- XD, Requires<[HasSSE2]>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
+ (VCVTPS2DQrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
+ (VCVTPS2DQrm addr:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
+ (CVTPS2DQrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
+ (CVTPS2DQrm addr:$src)>;
+}
+
+// Convert Packed Double FP to Packed DW Integers
+let Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQrr VR128:$dst, VR128:$src)>;
+def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
+}
+
+def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>;
+def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
+ (VCVTPD2DQrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
+ (VCVTPD2DQXrm addr:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
+ (CVTPD2DQrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
+ (CVTPD2DQrm addr:$src)>;
+}
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
@@ -1914,7 +1878,7 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memop addr:$src)))],
+ (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX;
def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
@@ -1935,14 +1899,19 @@ def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvttps2dq (memop addr:$src)))],
+ (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_VCVTDQ2PSrr VR128:$src)>;
+ (VCVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (Int_VCVTDQ2PSrm addr:$src)>;
+ (VCVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+ (VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ (VCVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(VCVTTPS2DQrr VR128:$src)>;
@@ -1962,9 +1931,14 @@ let Predicates = [HasAVX] in {
let Predicates = [HasSSE2] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_CVTDQ2PSrr VR128:$src)>;
+ (CVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (Int_CVTDQ2PSrm addr:$src)>;
+ (CVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+ (CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ (CVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(CVTTPS2DQrr VR128:$src)>;
@@ -1977,12 +1951,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst,
(int_x86_sse2_cvttpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
-let isCodeGenOnly = 1 in
-def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
@@ -1990,31 +1959,38 @@ def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))],
+ (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
-def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
-def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
+def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQrr VR128:$dst, VR128:$src)>;
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ (VCVTTPD2DQYrr VR256:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ (VCVTTPD2DQYrm addr:$src)>;
+} // Predicates = [HasAVX]
// Convert packed single to packed double
let Predicates = [HasAVX] in {
@@ -2032,35 +2008,71 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB, VEX;
}
+
+let Predicates = [HasSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, TB;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
+ (VCVTPS2PDrr VR128:$src)>;
+}
-def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- TB, Requires<[HasSSE2]>;
-def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- TB, Requires<[HasSSE2]>;
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
+ (CVTPS2PDrr VR128:$src)>;
+}
+
+// Convert Packed DW Integers to Packed Double FP
+let Predicates = [HasAVX] in {
+def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>;
+def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>;
+
+// 128 bit register conversion intrinsics
+let Predicates = [HasAVX] in
+def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
+ (VCVTDQ2PDrr VR128:$src)>;
+
+let Predicates = [HasSSE2] in
+def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
+ (CVTDQ2PDrr VR128:$src)>;
+
+// AVX 256-bit register conversion intrinsics
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+ (VCVTDQ2PDYrr VR128:$src)>;
+ def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+ (VCVTPD2DQYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTPD2DQYrm addr:$src)>;
+
+ def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
+ (VCVTDQ2PDYrr VR128:$src)>;
+ def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VCVTDQ2PDYrm addr:$src)>;
+} // Predicates = [HasAVX]
// Convert packed double to packed single
// The assembler can recognize rr 256-bit instructions by seeing a ymm
@@ -2069,25 +2081,24 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, VEX;
-def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
-def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
+def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSrr VR128:$dst, VR128:$src)>;
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
+
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>;
@@ -2096,64 +2107,60 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
IIC_SSE_CVT_PD_RM>;
-def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
-def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
+ (VCVTPD2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
+ (VCVTPD2PSXrm addr:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
+ (CVTPD2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
+ (CVTPD2PSrm addr:$src)>;
+}
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
// whenever possible to avoid declaring two versions of each one.
-def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
- (VCVTDQ2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
- (VCVTDQ2PSYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
- (VCVTPD2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
- (VCVTPD2PSYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
- (VCVTPS2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
- (VCVTPS2DQYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
- (VCVTPS2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
- (VCVTPS2PDYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
- (VCVTTPD2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTTPD2DQYrm addr:$src)>;
-
-// Match fround and fextend for 128/256-bit conversions
-def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
- (VCVTPD2PSYrr VR256:$src)>;
-def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
- (VCVTPD2PSYrm addr:$src)>;
-
-def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
- (VCVTPS2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
- (VCVTPS2PDYrm addr:$src)>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+ (VCVTPD2PSYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+ (VCVTPD2PSYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+ (VCVTPS2DQYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTPS2DQYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+ (VCVTPS2PDYrr VR128:$src)>;
+ def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+ (VCVTTPD2DQYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTTPD2DQYrm addr:$src)>;
+
+ // Match fround and fextend for 128/256-bit conversions
+ def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+ def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
+
+ def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
+ (VCVTPS2PDYrr VR128:$src)>;
+ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ (VCVTPS2PDYrm addr:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
@@ -4889,80 +4896,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
//===---------------------------------------------------------------------===//
-// SSE3 - Conversion Instructions
-//===---------------------------------------------------------------------===//
-
-// Convert Packed Double FP to Packed DW Integers
-let Predicates = [HasAVX] in {
-// The assembler can recognize rr 256-bit instructions by seeing a ymm
-// register, but the same isn't true when using memory operands instead.
-// Provide other assembly rr and rm forms to address this explicitly.
-def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-
-// XMM only
-def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
-
-// YMM only
-def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
-
-def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>;
-def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
-
-def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
- (VCVTTPD2DQYrr VR256:$src)>;
-def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
- (VCVTTPD2DQYrm addr:$src)>;
-
-// Convert Packed DW Integers to Packed Double FP
-let Predicates = [HasAVX] in {
-def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-}
-
-def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
-def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>;
-
-// AVX 256-bit register conversion intrinsics
-def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
- (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
- (VCVTDQ2PDYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
- (VCVTPD2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTPD2DQYrm addr:$src)>;
-
-def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
- (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (VCVTDQ2PDYrm addr:$src)>;
-
-//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
@@ -7339,8 +7272,8 @@ let ExeDomain = SSEPackedSingle in {
int_x86_avx2_vbroadcast_ss_ps_256>;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256>;
+def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
+ int_x86_avx2_vbroadcast_sd_pd_256>;
let Predicates = [HasAVX2] in
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
@@ -7751,6 +7684,31 @@ let Predicates = [HasAVX2] in {
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VPBROADCASTQYrm addr:$src)>;
+ def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBrr VR128:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBYrr VR128:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWrr VR128:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWYrr VR128:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDrr VR128:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDYrr VR128:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQYrr VR128:$src)>;
+ def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSrr VR128:$src)>;
+ def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSYrr VR128:$src)>;
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VBROADCASTSDYrr VR128:$src)>;
+
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
@@ -7761,7 +7719,7 @@ let Predicates = [HasAVX2] in {
(VBROADCASTSSYrr
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VBROADCASTSDrr
+ (VBROADCASTSDYrr
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
@@ -7771,7 +7729,7 @@ let Predicates = [HasAVX2] in {
(VBROADCASTSSYrr
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDrr
+ (VBROADCASTSDYrr
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
}
}
@@ -8061,3 +8019,55 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
+
+//===----------------------------------------------------------------------===//
+// VGATHER - GATHER Operations
+multiclass avx2_gather<bits<8> opc, string OpcodeStr,
+ RegisterClass RC256, X86MemOperand memop256,
+ Intrinsic IntGather128, Intrinsic IntGather256> {
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, v128mem:$src2, VR128:$mask),
+ !strconcat(OpcodeStr,
+ "\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
+ []>, VEX_4VOp3;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst),
+ (ins RC256:$src1, memop256:$src2, RC256:$mask),
+ !strconcat(OpcodeStr,
+ "\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
+ []>, VEX_4VOp3, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
+ VR256, v128mem,
+ int_x86_avx2_gather_d_pd,
+ int_x86_avx2_gather_d_pd_256>, VEX_W;
+ defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
+ VR256, v256mem,
+ int_x86_avx2_gather_q_pd,
+ int_x86_avx2_gather_q_pd_256>, VEX_W;
+ defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
+ VR256, v256mem,
+ int_x86_avx2_gather_d_ps,
+ int_x86_avx2_gather_d_ps_256>;
+ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
+ VR128, v256mem,
+ int_x86_avx2_gather_q_ps,
+ int_x86_avx2_gather_q_ps_256>;
+ defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
+ VR256, v128mem,
+ int_x86_avx2_gather_d_q,
+ int_x86_avx2_gather_d_q_256>, VEX_W;
+ defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
+ VR256, v256mem,
+ int_x86_avx2_gather_q_q,
+ int_x86_avx2_gather_q_q_256>, VEX_W;
+ defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
+ VR256, v256mem,
+ int_x86_avx2_gather_d_d,
+ int_x86_avx2_gather_d_d_256>;
+ defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
+ VR128, v256mem,
+ int_x86_avx2_gather_q_d,
+ int_x86_avx2_gather_q_d_256>;
+}
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 6a8f0c8486..6d3548f093 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -17,17 +17,17 @@
// 66 0F 38 80
def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In32BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In64BitMode]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In32BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In64BitMode]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1be4c3864a..ed086dd8ad 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -50,10 +50,6 @@ ForceStackAlign("force-align-stack",
" needed for the function."),
cl::init(false), cl::Hidden);
-cl::opt<bool>
-EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
- cl::desc("Enable use of a base pointer for complex stack frames"));
-
// @LOCALMOD-BEGIN
extern cl::opt<bool> FlagUseZeroBasedSandbox;
extern cl::opt<bool> FlagRestrictR15;
@@ -77,12 +73,10 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
SlotSize = 8;
StackPtr = X86::RSP;
FramePtr = X86::RBP;
- BasePtr = X86::RBX;
} else {
SlotSize = 4;
StackPtr = X86::ESP;
FramePtr = X86::EBP;
- BasePtr = X86::EBX;
}
}
@@ -301,20 +295,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*I);
}
- // Set the base-pointer register and its aliases as reserved if needed.
- if (hasBasePointer(MF)) {
- CallingConv::ID CC = MF.getFunction()->getCallingConv();
- const uint32_t* RegMask = getCallPreservedMask(CC);
- if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
- report_fatal_error(
- "Stack realignment in presence of dynamic allocas is not supported with"
- "this calling convention.");
-
- Reserved.set(getBaseRegister());
- for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I)
- Reserved.set(*I);
- }
-
// Mark the segment registers as reserved.
Reserved.set(X86::CS);
Reserved.set(X86::SS);
@@ -384,35 +364,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- if (!EnableBasePointer)
- return false;
-
- // When we need stack realignment and there are dynamic allocas, we can't
- // reference off of the stack pointer, so we reserve a base pointer.
- if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
- return true;
-
- return false;
-}
-
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const MachineRegisterInfo *MRI = &MF.getRegInfo();
- if (!MF.getTarget().Options.RealignStack)
- return false;
-
- // Stack realignment requires a frame pointer. If we already started
- // register allocation with frame pointer elimination, it is too late now.
- if (!MRI->canReserveReg(FramePtr))
- return false;
-
- // If base pointer is necessary. Check that it isn't too late to reserve it.
- if (MFI->hasVarSizedObjects())
- return MRI->canReserveReg(BasePtr);
- return true;
+ return (MF.getTarget().Options.RealignStack &&
+ !MFI->hasVarSizedObjects());
}
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
@@ -422,6 +377,13 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttr(Attribute::StackAlignment));
+ // FIXME: Currently we don't support stack realignment for functions with
+ // variable-sized allocas.
+ // FIXME: It's more complicated than this...
+ if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+ report_fatal_error(
+ "Stack realignment in presence of dynamic allocas is not supported");
+
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
return canRealignStack(MF);
@@ -561,9 +523,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Opc = MI.getOpcode();
bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
- if (hasBasePointer(MF))
- BasePtr = getBaseRegister();
- else if (needsStackRealignment(MF))
+ if (needsStackRealignment(MF))
BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
else if (AfterFPPop)
BasePtr = StackPtr;
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 1bc32cbb78..ee69842b10 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -50,11 +50,6 @@ private:
///
unsigned FramePtr;
- /// BasePtr - X86 physical register used as a base ptr in complex stack
- /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
- /// variable size stack objects.
- unsigned BasePtr;
-
public:
X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
@@ -111,8 +106,6 @@ public:
/// register scavenger to determine what registers are free.
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasBasePointer(const MachineFunction &MF) const;
-
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
@@ -130,7 +123,6 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getStackRegister() const { return StackPtr; }
- unsigned getBaseRegister() const { return BasePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 6d05a91a32..20acc2bab3 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -150,44 +150,44 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
+ addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
// For ELF, cleanup any local-dynamic TLS accesses.
if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
- PM->add(createCleanupLocalDynamicTLSPass());
+ addPass(createCleanupLocalDynamicTLSPass());
// For 32-bit, prepend instructions to set the "global base reg" for PIC.
if (!getX86Subtarget().is64Bit())
- PM->add(createGlobalBaseRegPass());
+ addPass(createGlobalBaseRegPass());
return false;
}
bool X86PassConfig::addPreRegAlloc() {
- PM->add(createX86MaxStackAlignmentHeuristicPass());
+ addPass(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
bool X86PassConfig::addPostRegAlloc() {
- PM->add(createX86FloatingPointStackifierPass());
+ addPass(createX86FloatingPointStackifierPass());
return true; // -print-machineinstr should print after this.
}
bool X86PassConfig::addPreEmitPass() {
bool ShouldPrint = false;
if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) {
- PM->add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
ShouldPrint = true;
}
if (getX86Subtarget().hasAVX() && UseVZeroUpper) {
- PM->add(createX86IssueVZeroUpperPass());
+ addPass(createX86IssueVZeroUpperPass());
ShouldPrint = true;
}
// @LOCALMOD-START
if (getX86Subtarget().isTargetNaCl()) {
- PM->add(createX86NaClRewritePass());
+ addPass(createX86NaClRewritePass());
ShouldPrint = true;
}
// @LOCALMOD-END
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 32bfba96bb..4f39d68d40 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -10,17 +10,19 @@
#include "X86TargetObjectFile.h"
#include "X86TargetMachine.h"
#include "X86Subtarget.h" // @LOCALMOD
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionELF.h" // @LOCALMOD
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
using namespace dwarf;
-const MCExpr *X8664_MachoTargetObjectFile::
+const MCExpr *X86_64MachoTargetObjectFile::
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const {
@@ -39,12 +41,18 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
-MCSymbol *X8664_MachoTargetObjectFile::
+MCSymbol *X86_64MachoTargetObjectFile::
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
return Mang->getSymbol(GV);
}
+void
+X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
+
// @LOCALMOD-START
// NOTE: this was largely lifted from
// lib/Target/ARM/ARMTargetObjectFile.cpp
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 34c1234eae..5fac48e57a 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -16,9 +16,9 @@
namespace llvm {
- /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
+ /// X86_64MachoTargetObjectFile - This TLOF implementation is used for Darwin
/// x86-64.
- class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+ class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
virtual const MCExpr *
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
@@ -32,6 +32,12 @@ namespace llvm {
MachineModuleInfo *MMI) const;
};
+ /// X86LinuxTargetObjectFile - This implementation is used for linux x86
+ /// and x86-64.
+ class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ };
+
// @LOCALMOD-BEGIN
class TargetLoweringObjectFileNaCl : public TargetLoweringObjectFileELF {
public: