diff options
author | Alexander Kornienko <alexfh@google.com> | 2013-03-26 02:28:59 +0000 |
---|---|---|
committer | Alexander Kornienko <alexfh@google.com> | 2013-03-26 02:28:59 +0000 |
commit | d934545ae6a00aa8a8179a93d11cbd93a5240849 (patch) | |
tree | ab44db08aa63a8f94a3e09d6491c4156c624af96 /lib/Target/X86 | |
parent | 868d4470cdfa9472353ea2a49a6c456ddae9c95b (diff) | |
parent | c204410d6bc435e7cb8ea768759a54135e8e92b5 (diff) |
Updating branches/google/testing to r177703testing
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@177985 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 450 | ||||
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 27 | ||||
-rw-r--r-- | lib/Target/X86/X86CodeEmitter.cpp | 24 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 437 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrArithmetic.td | 187 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCMovSetCC.td | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 20 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrExtension.td | 65 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 90 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 762 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSystem.td | 24 | ||||
-rw-r--r-- | lib/Target/X86/X86MCInstLower.cpp | 51 | ||||
-rw-r--r-- | lib/Target/X86/X86Schedule.td | 82 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetTransformInfo.cpp | 27 |
15 files changed, 1344 insertions, 914 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ee5c2b2bfd..d5568e08d3 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -62,6 +62,9 @@ private: X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End, + SMLoc SizeDirLoc, unsigned Size); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, SmallString<64> &Err); @@ -170,30 +173,33 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc OffsetOfLoc; bool AddressOf; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNo; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned SegReg; + const MCExpr *Disp; + unsigned BaseReg; + unsigned IndexReg; + unsigned Scale; + unsigned Size; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNo; - } Reg; - - struct { - const MCExpr *Val; - bool NeedAsmRewrite; - } Imm; - - struct { - unsigned SegReg; - const MCExpr *Disp; - unsigned BaseReg; - unsigned IndexReg; - unsigned Scale; - unsigned Size; - bool NeedSizeDir; - } Mem; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -231,11 +237,6 @@ struct X86Operand : public MCParsedAsmOperand { return Imm.Val; } - bool needAsmRewrite() const { - assert(Kind == Immediate && "Invalid access!"); - return Imm.NeedAsmRewrite; - } - const MCExpr *getMemDisp() const { assert(Kind == Memory && "Invalid access!"); return Mem.Disp; @@ -332,11 +333,6 @@ struct X86Operand : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } - unsigned getMemSize() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.Size; - } - bool isOffsetOf() const { return OffsetOfLoc.getPointer(); } @@ -345,11 +341,6 @@ struct X86Operand : public MCParsedAsmOperand { return AddressOf; } - bool needSizeDirective() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.NeedSizeDir; - } - bool isMem() const { return Kind == Memory; } bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); @@ -485,17 +476,15 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, - bool NeedRewrite = true){ + static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; - Res->Imm.NeedAsmRewrite = NeedRewrite; return Res; } /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -503,7 +492,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -512,7 +500,7 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -527,7 +515,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -890,6 +877,45 @@ public: } }; +X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, + SMLoc End, SMLoc SizeDirLoc, + unsigned Size) { + bool NeedSizeDir = false; + bool IsVarDecl = false; + if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + unsigned tLength, tSize, tType; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, + tSize, tType, IsVarDecl); + if (!Size) { + Size = tType * 8; // Size is in terms of bits in this context. + NeedSizeDir = Size > 0; + } + } + + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + + if (NeedSizeDir) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc, + /*Len*/0, Size)); + + // When parsing inline assembly we set the base register to a non-zero value + // as we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, + /*Scale*/1, Start, End, Size); +} + X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { const AsmToken &Tok = Parser.getTok(); @@ -914,7 +940,12 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, // Adjust the EndLoc due to the ']'. End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); Parser.Lex(); - return X86Operand::CreateMem(Disp, Start, End, Size); + if (!isParsingInlineAsm()) + return X86Operand::CreateMem(Disp, Start, End, Size); + + // We want the size directive before the '['. + SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1); + return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size); } } @@ -1036,40 +1067,9 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { if (getParser().parseExpression(Disp, End)) return 0; - bool NeedSizeDir = false; - bool IsVarDecl = false; - if (isParsingInlineAsm()) { - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - unsigned tLength, tSize, tType; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, - tSize, tType, IsVarDecl); - if (!Size) - Size = tType * 8; // Size is in terms of bits in this context. - NeedSizeDir = Size > 0; - } - } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else { - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); - } - - // When parsing inline assembly we set the base register to a non-zero value - // as we don't know the actual value at this time. This is necessary to - // get the matching correct in some cases. - return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, - /*Scale*/1, Start, End, Size, NeedSizeDir); - } + return CreateMemForInlineAsm(Disp, Start, End, Start, Size); } /// Parse the '.' operator. @@ -1197,7 +1197,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); - return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); + return X86Operand::CreateImm(Imm, Start, End); } X86Operand *X86AsmParser::ParseIntelOperand() { @@ -1221,6 +1221,8 @@ X86Operand *X86AsmParser::ParseIntelOperand() { getLexer().is(AsmToken::Minus)) { const MCExpr *Val; if (!getParser().parseExpression(Val, End)) { + if (isParsingInlineAsm()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); return X86Operand::CreateImm(Val, Start, End); } } @@ -1734,242 +1736,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return false; } -bool X86AsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { - switch (Inst.getOpcode()) { - default: return false; - case X86::AND16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, + bool isCmp) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + if (!isCmp) + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; +} - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::AX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } +bool X86AsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { + switch (Inst.getOpcode()) { + default: return false; + case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); + case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); + case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); + case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); + case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); + case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); + case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); + case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); + case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); + case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); + case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); + case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); + case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); + case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); + case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); + case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); + case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); + case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); + case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); + case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); + case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); + case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); + case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); + case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); } } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 122204ae75..5fbefaec5e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // dst(ModR/M), src1(ModR/M) // dst(ModR/M), src1(ModR/M), imm8 // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; CurOp++; if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + EmitRegModRMByte(MI.getOperand(CurOp), - GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); - CurOp += 2; + GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + CurOp = SrcRegNum + 1; break; case X86II::MRMDestMem: diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ece38aa346..2518e02e2a 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, const MCInstrDesc *Desc) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp+1).getReg())); - CurOp += 2; + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; break; } case X86II::MRMDestMem: { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b5c3270065..85155f55e0 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1526,6 +1526,9 @@ bool X86FastISel::FastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; + if (Subtarget->isTargetWindows()) + return false; + const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 960870dc60..23cfd6d72f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1053,23 +1053,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i16, Custom); setOperationAction(ISD::SRA, MVT::v16i8, Custom); - if (Subtarget->hasInt256()) { - setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SRL, MVT::v4i32, Legal); - - setOperationAction(ISD::SHL, MVT::v2i64, Legal); - setOperationAction(ISD::SHL, MVT::v4i32, Legal); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v2i64, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Legal); - } else { - setOperationAction(ISD::SRL, MVT::v2i64, Custom); - setOperationAction(ISD::SRL, MVT::v4i32, Custom); + setOperationAction(ISD::SHL, MVT::v2i64, Custom); + setOperationAction(ISD::SHL, MVT::v4i32, Custom); - setOperationAction(ISD::SHL, MVT::v2i64, Custom); - setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SRA, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Custom); - } setOperationAction(ISD::SDIV, MVT::v8i16, Custom); setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } @@ -1186,14 +1179,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - setOperationAction(ISD::SRL, MVT::v4i64, Legal); - setOperationAction(ISD::SRL, MVT::v8i32, Legal); - - setOperationAction(ISD::SHL, MVT::v4i64, Legal); - setOperationAction(ISD::SHL, MVT::v8i32, Legal); - - setOperationAction(ISD::SRA, MVT::v8i32, Legal); - setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); @@ -1210,15 +1195,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); // Don't lower v32i8 because there is no 128-bit byte mul + } - setOperationAction(ISD::SRL, MVT::v4i64, Custom); - setOperationAction(ISD::SRL, MVT::v8i32, Custom); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); - setOperationAction(ISD::SHL, MVT::v4i64, Custom); - setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); - setOperationAction(ISD::SRA, MVT::v8i32, Custom); - } + setOperationAction(ISD::SRA, MVT::v8i32, Custom); // Custom lower several nodes for 256-bit types. for (int i = MVT::FIRST_VECTOR_VALUETYPE; @@ -7834,7 +7821,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { Chain.getValue(1)); } - if (Subtarget->isTargetWindows()) { + if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -7854,18 +7841,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or - // %gs:0x58 (64-bit). + // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly + // use its literal value of 0x2C. Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); - SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, - Subtarget->is64Bit() - ? DAG.getIntPtrConstant(0x58) - : DAG.getExternalSymbol("_tls_array", - getPointerTy()), + SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) : + (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) : + DAG.getExternalSymbol("_tls_array", getPointerTy())); + + SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray, MachinePointerInfo(Ptr), false, false, false, 0); @@ -11490,16 +11478,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { - +static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - if (!Subtarget->hasSSE2()) - return SDValue(); - // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { SDValue SclrAmt = Amt->getOperand(0); @@ -11610,6 +11595,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } } + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + unsigned RatioInLog2 = Log2_32_Ceil(Ratio); + uint64_t ShiftAmt = 0; + for (unsigned i = 0; i != Ratio; ++i) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); + } + // Check remaining shift amounts. + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Amt.getOperand(i + j)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRAI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + } + } + + return SDValue(); +} + +static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + + if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v4i32 || VT == MVT::v8i16 || + (Subtarget->hasInt256() && + ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v8i32 || VT == MVT::v16i16))) { + SDValue BaseShAmt; + EVT EltVT = VT.getVectorElementType(); + + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned i, j; + for (i = 0; i != NumElts; ++i) { + if (Amt.getOperand(i).getOpcode() == ISD::UNDEF) + continue; + break; + } + for (j = i; j != NumElts; ++j) { + SDValue Arg = Amt.getOperand(j); + if (Arg.getOpcode() == ISD::UNDEF) continue; + if (Arg != Amt.getOperand(i)) + break; + } + if (i != NumElts && j == NumElts) + BaseShAmt = Amt.getOperand(i); + } else { + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) + Amt = Amt.getOperand(0); + if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE && + cast<ShuffleVectorSDNode>(Amt)->isSplat()) { + SDValue InVec = Amt.getOperand(0); + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + unsigned i = 0; + for (; i != NumElts; ++i) { + SDValue Arg = InVec.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + BaseShAmt = Arg; + break; + } + } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { + unsigned SplatIdx = + cast<ShuffleVectorSDNode>(Amt)->getSplatIndex(); + if (C->getZExtValue() == SplatIdx) + BaseShAmt = InVec.getOperand(1); + } + } + if (BaseShAmt.getNode() == 0) + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt, + DAG.getIntPtrConstant(0)); + } + } + + if (BaseShAmt.getNode()) { + if (EltVT.bitsGT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt); + else if (EltVT.bitsLT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRA: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v4i32: + case MVT::v8i16: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG); + } + } + } + } + + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + std::vector<SDValue> Vals(Ratio); + for (unsigned i = 0; i != Ratio; ++i) + Vals[i] = Amt.getOperand(i); + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + for (unsigned j = 0; j != Ratio; ++j) + if (Vals[j] != Amt.getOperand(i + j)) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + SDValue V; + + if (!Subtarget->hasSSE2()) + return SDValue(); + + V = LowerScalarImmediateShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + V = LowerScalarVariableShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + // AVX2 has VPSLLV/VPSRAV/VPSRLV. + if (Subtarget->hasInt256()) { + if (Op.getOpcode() == ISD::SRL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SHL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32)) + return Op; + } + // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); @@ -11826,8 +12029,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, // fall through case MVT::v4i32: case MVT::v8i16: { - SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, - Op.getOperand(0), ShAmt, DAG); + // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && + Op00.getOpcode() == ISD::VECTOR_SHUFFLE) + Tmp1 = LowerVectorIntExtend(Op00, DAG); + if (Tmp1.getNode()) { + SDValue Tmp1Op0 = Tmp1.getOperand(0); + assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && + "This optimization is invalid without a VZEXT."); + return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. + Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG); return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG); } } @@ -12262,7 +12480,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::UINT_TO_FP: { - if (N->getOperand(0).getValueType() != MVT::v2i32 && + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + if (N->getOperand(0).getValueType() != MVT::v2i32 || N->getValueType(0) != MVT::v2f32) return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, @@ -15918,124 +16137,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SHL) { SDValue V = PerformSHLCombine(N, DAG); if (V.getNode()) return V; } - // On X86 with SSE2 support, we can transform this to a vector shift if - // all elements are shifted by the same amount. We can't do this in legalize - // because the a constant vector is typically transformed to a constant pool - // so we have no knowledge of the shift amount. - if (!Subtarget->hasSSE2()) - return SDValue(); - - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && - (!Subtarget->hasInt256() || - (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) - return SDValue(); - - SDValue ShAmtOp = N->getOperand(1); - EVT EltVT = VT.getVectorElementType(); - DebugLoc DL = N->getDebugLoc(); - SDValue BaseShAmt = SDValue(); - if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = VT.getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - // Handle the case where the build_vector is all undef - // FIXME: Should DAG allow this? - if (i == NumElts) - return SDValue(); - - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - if (Arg != BaseShAmt) { - return SDValue(); - } - } - } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { - SDValue InVec = ShAmtOp.getOperand(0); - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = InVec.getValueType().getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = InVec.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); - if (C->getZExtValue() == SplatIdx) - BaseShAmt = InVec.getOperand(1); - } - } - if (BaseShAmt.getNode() == 0) { - // Don't create instructions with illegal types after legalize - // types has run. - if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) && - !DCI.isBeforeLegalize()) - return SDValue(); - - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); - } - } else - return SDValue(); - - // The shift amount is an i32. - if (EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); - else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt); - - // The shift amount is identical so we can do a vector shift. - SDValue ValOp = N->getOperand(0); - switch (N->getOpcode()) { - default: - llvm_unreachable("Unknown shift opcode!"); - case ISD::SHL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRA: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v4i32: - case MVT::v8i16: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG); - } - } + return SDValue(); } // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) @@ -16346,13 +16453,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // Validate that the Mask operand is a vector sra node. // FIXME: what to do for bytes, since there is a psignb/pblendvb, but // there is no psrai.b - if (Mask.getOpcode() != X86ISD::VSRAI) - return SDValue(); - - // Check that the SRA is all signbits. - SDValue SraC = Mask.getOperand(1); - unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + unsigned SraAmt = ~0; + if (Mask.getOpcode() == ISD::SRA) { + SDValue Amt = Mask.getOperand(1); + if (isSplatVector(Amt.getNode())) { + SDValue SclrAmt = Amt->getOperand(0); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) + SraAmt = C->getZExtValue(); + } + } else if (Mask.getOpcode() == X86ISD::VSRAI) { + SDValue SraC = Mask.getOperand(1); + SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + } if ((SraAmt + 1) != EltBits) return SDValue(); diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d86a4065a7..19bdb96f05 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// // LEA - Load Effective Address - +let SchedRW = [WriteLEA] in { let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), @@ -36,41 +36,52 @@ let isReMaterializable = 1 in def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; - - +} // SchedRW //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions. // +// SchedModel info for instruction that loads one value and gets the second +// (and possibly third) value from a register. +// This is used for instructions that put the memory operands before other +// uses. +class SchedLoadReg<SchedWrite SW> : Sched<[SW, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Register reads (implicit or explicit). + ReadAfterLd, ReadAfterLd]>; + // Extra precision multiplication // AL is really implied by AX, but the registers in Defs must match the // SDNode results (i8, i32). +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 - + (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 - + [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", // EAX,EDX = EAX*GR32 + "mul{l}\t$src", [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/], - IIC_MUL32_REG>; + IIC_MUL32_REG>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", // RAX,RDX = RAX*GR64 + "mul{q}\t$src", [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/], - IIC_MUL64>; - + IIC_MUL64>, Sched<[WriteIMul]>; +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", @@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8] - + (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let mayLoad = 1, neverHasSideEffects = 1 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), "mul{w}\t$src", - [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16] - + [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", - [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32] + [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64] + "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>; } let neverHasSideEffects = 1 in { +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], - IIC_IMUL8>; // AL,AH = AL*GR8 + IIC_IMUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], - IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 + IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], - IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 + IIC_IMUL32_RR>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], - IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 + IIC_IMUL64_RR>, Sched<[WriteIMul]>; let mayLoad = 1 in { +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; - // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize, + SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>; } } // neverHasSideEffects @@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +let isCommutable = 1, SchedRW = [WriteIMul] in { +// X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>, TB; -} +} // isCommutable, SchedRW // Register-Memory Signed Integer Multiply +let SchedRW = [WriteIMulLd, ReadAfterLd] in { def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (X86smul_flag GR64:$src1, (load addr:$src2)))], IIC_IMUL64_RM>, TB; +} // SchedRW } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] // Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { +let SchedRW = [WriteIMul] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), @@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, i64immSExt8:$src2))], IIC_IMUL64_RRI>; - +} // SchedRW // Memory-Integer Signed Integer Multiply +let SchedRW = [WriteIMulLd] in { def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (X86smul_flag (load addr:$src1), i64immSExt8:$src2))], IIC_IMUL64_RMI>; +} // SchedRW } // Defs = [EFLAGS] @@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder let hasSideEffects = 1 in { // so that we don't speculatively execute +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; @@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), "div{q}\t$src", [], IIC_DIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", [], IIC_DIV8_MEM>; + "div{b}\t$src", [], IIC_DIV8_MEM>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", [], IIC_DIV16>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", [], IIC_DIV32>; + "div{l}\t$src", [], IIC_DIV32>, + SchedLoadReg<WriteIDivLd>; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", [], IIC_DIV64>; + "div{q}\t$src", [], IIC_DIV64>, + SchedLoadReg<WriteIDivLd>; } // Signed division/remainder. +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", [], IIC_IDIV8>; @@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), "idiv{q}\t$src", [], IIC_IDIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", [], IIC_IDIV8>; + "idiv{b}\t$src", [], IIC_IDIV8>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", [], IIC_IDIV32>; + "idiv{l}\t$src", [], IIC_IDIV32>, + SchedLoadReg<WriteIDivLd>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", [], IIC_IDIV64>; + "idiv{q}\t$src", [], IIC_IDIV64>, + SchedLoadReg<WriteIDivLd>; } } // hasSideEffects = 0 @@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), "neg{b}\t$dst", [(set GR8:$dst, (ineg GR8:$src1)), @@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src1)), (implicit EFLAGS)], IIC_UNARY_REG>; -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +// Read-modify-write negate. +let SchedRW = [WriteALULd, WriteRMW] in { def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), @@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; +} // SchedRW } // Defs = [EFLAGS] // Note: NOT does not set EFLAGS! -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>; } -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +let SchedRW = [WriteALULd, WriteRMW] in { def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; @@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; +} // SchedRW } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", @@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), Requires<[In64BitMode]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", @@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], IIC_UNARY_REG>; } // CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -532,7 +575,7 @@ let CodeSize = 2 in { def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW } // Defs = [EFLAGS] @@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, + Sched<[WriteALU]>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALULd, ReadAfterLd]>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; } @@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, list<dag> pattern> : ITy<opcode, MRMDestMem, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]>; // BinOpMR_RMW - Instructions like "add [mem], reg". class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern, bits<8> opcode = 0x80> : ITy<opcode, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = typeinfo.ImmEncoding; } @@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands> : ITy<opcode, RawFrm, typeinfo, (outs), (ins typeinfo.ImmOperand:$src), - mnemonic, operands, []> { + mnemonic, operands, []>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; @@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in { // register class is constrained to GR8_NOREX. let isPseudo = 1 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), - "", [], IIC_BIN_NONMEM>; + "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } //===----------------------------------------------------------------------===// @@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))], - IIC_BIN_NONMEM>; + IIC_BIN_NONMEM>, Sched<[WriteALU]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>; + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>, + Sched<[WriteALULd, ReadAfterLd]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in { // ADCX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize; @@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8, OpSize; @@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { // ADOX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; @@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 8f2d0a1aae..a967a4da5c 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -16,7 +16,7 @@ // SetCC instructions. multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1 in { + isCommutable = 1, SchedRW = [WriteALU] in { def NAME#16rr : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { IIC_CMOV32_RR>, TB; } - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + SchedRW = [WriteALULd, ReadAfterLd] in { def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { def r : I<opc, MRM0r, (outs GR8:$dst), (ins), !strconcat(Mnemonic, "\t$dst"), [(set GR8:$dst, (X86setcc OpNode, EFLAGS))], - IIC_SET_R>, TB; + IIC_SET_R>, TB, Sched<[WriteALU]>; def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), !strconcat(Mnemonic, "\t$dst"), [(store (X86setcc OpNode, EFLAGS), addr:$dst)], - IIC_SET_M>, TB; + IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>; } // Uses = [EFLAGS] } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 734e5982b2..2b27bc5bc5 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)], IIC_RET>; + [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)], IIC_RET>; + [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -220,7 +220,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -229,11 +229,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize, + Sched<[WriteZero]>; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -245,7 +246,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -254,10 +255,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)], - IIC_ALU_NONMEM>; + IIC_ALU_NONMEM>, Sched<[WriteALU]>; // Use sbb to materialize carry bit. -let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in { +let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. @@ -990,9 +991,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; -// This corresponds to mov foo@tpoff(%rbx), %eax -def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), - (MOV64rm tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 2eb454ded2..5ef0c3c13b 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in { def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB, + Sched<[WriteALULd]>; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; let neverHasSideEffects = 1 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB, + Sched<[WriteALULd]>; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class @@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALULd]>; } // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -106,24 +112,28 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, // were generalized, this would require a special register class. def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>, + Sched<[WriteALU]>; def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>, + Sched<[WriteALULd]>; // movzbq and movzwq encodings for the disassembler def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), @@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in { // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "", [(set GR64:$dst, (zextloadi64i16 addr:$src))], - IIC_MOVZX>, TB; + IIC_MOVZX>, TB, Sched<[WriteALULd]>; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>; + "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>, + Sched<[WriteALU]>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "", [(set GR64:$dst, (zextloadi64i32 addr:$src))], - IIC_MOVZX>; + IIC_MOVZX>, Sched<[WriteALULd]>; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d989ec7bb0..39165e24a8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -786,7 +786,7 @@ def LEAVE64 : I<0xC9, RawFrm, // let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG16>, OpSize; def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], @@ -803,9 +803,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [], def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize; def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, Requires<[In32BitMode]>; -} +} // mayLoad, SchedRW -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize; def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], @@ -832,29 +832,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>, Requires<[In32BitMode]>; -} +} // mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [], IIC_POP_MEM>; -} -let mayStore = 1 in { +} // mayLoad, SchedRW +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>; -} +} // mayStore, SchedRW } -let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { +let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1, + SchedRW = [WriteStore] in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), @@ -871,17 +872,18 @@ def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, Requires<[In64BitMode]>; let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], - mayLoad=1, neverHasSideEffects=1 in { + mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], - mayStore=1, neverHasSideEffects=1 in { + mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } -let Constraints = "$src = $dst" in { // GR32 = bswap GR32 +let Constraints = "$src = $dst", SchedRW = [WriteALU] in { +// GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", @@ -890,7 +892,7 @@ def BSWAP32r : I<0xC8, AddRegFrm, def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW // Bit scan instructions. let Defs = [EFLAGS] in { @@ -976,7 +978,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>; //===----------------------------------------------------------------------===// // Move Instructions. // - +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; @@ -987,6 +989,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } + let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", @@ -1004,7 +1007,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>; } +} // SchedRW +let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>; @@ -1017,9 +1022,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. +let SchedRW = [WriteALU] in { def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; @@ -1038,6 +1045,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; +} // FIXME: These definitions are utterly broken // Just leave them commented out for now because they're useless outside @@ -1055,7 +1063,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), */ -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), @@ -1066,7 +1074,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>; @@ -1081,6 +1089,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>; } +let SchedRW = [WriteStore] in { def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>; @@ -1093,6 +1102,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be @@ -1101,34 +1111,37 @@ let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), - "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>, + Sched<[WriteMove]>; let mayStore = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteStore]>; let mayLoad = 1, neverHasSideEffects = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteLoad]>; } // Condition code ops, incl. set if equal/not equal/... +let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", [(set EFLAGS, (X86sahf AH))], IIC_AHF>; let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], IIC_AHF>; // AH = flags - +} // SchedRW //===----------------------------------------------------------------------===// // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { +let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, @@ -1139,13 +1152,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB; +} // SchedRW // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Make these instructions disassembly // only for now. -let mayLoad = 1, hasSideEffects = 0 in { +let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), @@ -1166,6 +1180,7 @@ let mayLoad = 1, hasSideEffects = 0 in { >, TB; } +let SchedRW = [WriteALU] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))], @@ -1178,10 +1193,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))], IIC_BT_RI>, TB; +} // SchedRW // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. +let SchedRW = [WriteALU] in { def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) @@ -1194,8 +1211,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))], IIC_BT_MI>, TB; +} // SchedRW let hasSideEffects = 0 in { +let SchedRW = [WriteALU] in { def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1203,8 +1222,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd] in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1214,6 +1234,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1221,8 +1242,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1232,6 +1254,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1239,6 +1262,7 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +} // SchedRW let mayLoad = 1, mayStore = 1 in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), @@ -1250,6 +1274,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1257,8 +1282,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1268,6 +1294,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1275,8 +1302,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1286,6 +1314,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1293,8 +1322,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1483,6 +1513,7 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; // Table lookup instructions def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>; +let SchedRW = [WriteMicrocoded] in { // ASCII Adjust After Addition // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, @@ -1512,7 +1543,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, Requires<[In32BitMode]>; +} // SchedRW +let SchedRW = [WriteSystem] in { // Check Array Index Against Bounds def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize, @@ -1528,11 +1561,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[In32BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // MOVBE Instructions // let Predicates = [HasMOVBE] in { + let SchedRW = [WriteALULd] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>, @@ -1545,6 +1580,8 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>, T8; + } + let SchedRW = [WriteStore] in { def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>, @@ -1557,6 +1594,7 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>, T8; + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0979752757..4d43ee1f15 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -16,6 +16,8 @@ class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; + // InstrSchedModel info. + X86FoldableSchedWrite Sched = WriteFAdd; } class SizeItins<OpndItins arg_s, OpndItins arg_d> { @@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, // scalar +let Sched = WriteFAdd in { def SSE_ALU_F32S : OpndItins< IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM >; @@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins< def SSE_ALU_F64S : OpndItins< IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM >; +} def SSE_ALU_ITINS_S : SizeItins< SSE_ALU_F32S, SSE_ALU_F64S >; +let Sched = WriteFMul in { def SSE_MUL_F32S : OpndItins< IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM >; @@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins< def SSE_MUL_F64S : OpndItins< IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM >; +} def SSE_MUL_ITINS_S : SizeItins< SSE_MUL_F32S, SSE_MUL_F64S >; +let Sched = WriteFDiv in { def SSE_DIV_F32S : OpndItins< IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM >; @@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins< def SSE_DIV_F64S : OpndItins< IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM >; +} def SSE_DIV_ITINS_S : SizeItins< SSE_DIV_F32S, SSE_DIV_F64S >; // parallel +let Sched = WriteFAdd in { def SSE_ALU_F32P : OpndItins< IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM >; @@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins< def SSE_ALU_F64P : OpndItins< IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM >; +} def SSE_ALU_ITINS_P : SizeItins< SSE_ALU_F32P, SSE_ALU_F64P >; +let Sched = WriteFMul in { def SSE_MUL_F32P : OpndItins< IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM >; @@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins< def SSE_MUL_F64P : OpndItins< IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM >; +} def SSE_MUL_ITINS_P : SizeItins< SSE_MUL_F32P, SSE_MUL_F64P >; +let Sched = WriteFDiv in { def SSE_DIV_F32P : OpndItins< IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM >; @@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins< def SSE_DIV_F64P : OpndItins< IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM >; +} def SSE_DIV_ITINS_P : SizeItins< SSE_DIV_F32P, SSE_DIV_F64P @@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; +let Sched = WriteVecALU in { def SSE_INTALU_ITINS_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; @@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins< def SSE_INTALUQ_ITINS_P : OpndItins< IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM >; +} +let Sched = WriteVecIMul in def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; @@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], itins.rr>; + RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))], itins.rm>; + RC:$src1, mem_cpat:$src2))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class @@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], - itins.rm, d>; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, IIC_DEFAULT, d>; + pat_rr, IIC_DEFAULT, d>, + Sched<[WriteVecLogic]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rm, IIC_DEFAULT, d>; + pat_rm, IIC_DEFAULT, d>, + Sched<[WriteVecLogicLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -345,7 +370,7 @@ let Predicates = [HasAVX] in { // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", @@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } @@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>; // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, Predicates = [HasAVX] in { + isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>; } @@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in @@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, !strconcat(base_opc, asm_opr), [(set VR128:$dst, (vt (OpNode VR128:$src1, (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; + IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; // For the disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), !strconcat(base_opc, asm_opr), - [], IIC_SSE_MOV_S_RR>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; } multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, @@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - VEX, VEX_LIG; + VEX, VEX_LIG, Sched<[WriteStore]>; // SSE1 & 2 let Constraints = "$src1 = $dst" in { defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, @@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + Sched<[WriteStore]>; } // Loading from memory automatically zeroing upper bits. @@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; + IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; } defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; @@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, + Sched<[WriteLoad]>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, TB, OpSize; +let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, VEX, VEX_L; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], @@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; +let SchedRW = [WriteStore] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -1009,7 +1041,7 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), addr:$dst), - (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), addr:$dst), (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; @@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in { // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", @@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - itin, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB, + Sched<[WriteShuffleLd, ReadAfterLd]>; def PDrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - itin, SSEPackedDouble>, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize, + Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } let Predicates = [HasAVX] in { @@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_Scalar : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_32 : OpndItins< IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_64 : OpndItins< IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; @@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, OpndItins itins> { let neverHasSideEffects = 1 in { def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [], itins.rr, d>; + [], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [], itins.rm, d>; + [], itins.rm, d>, Sched<[itins.Sched.Folded]>; } } @@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2F]>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2FLd, ReadAfterLd]>; } // neverHasSideEffects = 1 } @@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, @@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG; + IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, @@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))], - IIC_SSE_CVT_Scalar_RR>; + IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, @@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert scalar single to scalar double @@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, - XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; + XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f64 (fextend FR32:$src)), @@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[UseSSE2]>; + Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; // Convert Packed Double FP to Packed DW Integers @@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in { def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - VEX; + VEX, Sched<[WriteCvtF2I]>; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", @@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX, + Sched<[WriteCvtF2ILd]>; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L; + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, + Sched<[WriteCvtF2I]>; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, - VEX, VEX_L; + VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; } @@ -1895,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, + Sched<[WriteCvtF2ILd]>; def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. @@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; @@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in { def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, + Sched<[WriteCvtF2ILd]>; // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in { def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX; + IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX; + IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; } let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB; + IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB; + IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>; } // Convert Packed DW Integers to Packed Double FP @@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in { let neverHasSideEffects = 1, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - []>, VEX; + []>, VEX, Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, + Sched<[WriteCvtI2F]>; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtdq2_pd_256 - (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L; + (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L, + Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L; + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, + Sched<[WriteCvtI2F]>; } let neverHasSideEffects = 1, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; // AVX 256-bit register conversion intrinsics let Predicates = [HasAVX] in { @@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in { def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", @@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; // AVX 256-bit register conversion intrinsics @@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RR>; + IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RM>; + IIC_SSE_ALU_F32S_RM>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], - itins.rr>; + itins.rr>, + Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Aliases to match intrinsics which expect XMM operand(s). @@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], - IIC_SSE_COMIS_RR, d>; + IIC_SSE_COMIS_RR, d>, + Sched<[WriteFAdd]>; def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), (ld_frag addr:$src2)))], - IIC_SSE_COMIS_RM, d>; + IIC_SSE_COMIS_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } let Defs = [EFLAGS] in { @@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; + IIC_SSE_CMPP_RR, d>, + Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; + IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; + asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + asm_alt, [], IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } } @@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffle]>; } defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, (mem_frag addr:$src2))))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W; + IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in { // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX; + SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX; + OpSize, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX, VEX_L; + SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX, VEX_L; + OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>; } defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", @@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in { /// /// And, we have a special variant form for a full-vector intrinsic form. +let Sched = WriteFSqrt in { def SSE_SQRTP : OpndItins< IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM >; @@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins< def SSE_SQRTS : OpndItins< IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM >; +} +let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM >; @@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins< def SSE_RCPS : OpndItins< IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM >; +} /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, @@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. @@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; let Constraints = "$src1 = $dst" in { def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rr>; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rm>; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int VR256:$src))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR64:$src1, FR64:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, + Sched<[itins.Sched]>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } // Square root. @@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in { //===----------------------------------------------------------------------===// let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; - - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; -} +let SchedRW = [WriteStore] in { +def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; +def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +let ExeDomain = SSEPackedInt in +def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +let ExeDomain = SSEPackedInt in +def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; -let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], @@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVNT>; -def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; - // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), [(nontemporalstore (i64 GR64:$src), addr:$dst)], IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; -} +} // SchedRW = [WriteStore] + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; +} // AddedComplexity //===----------------------------------------------------------------------===// // SSE 1 & 2 - Prefetch and memory fence //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasSSE1] in { +let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], IIC_SSE_PREFETCH>, TB; @@ -3402,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), IIC_SSE_PREFETCH>, TB; } +// FIXME: How should these memory instructions be modeled? +let SchedRW = [WriteLoad] in { // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], @@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, TB, Requires<[HasSSE2]>; +} // SchedRW def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX; @@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in { } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, @@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), } } +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3538,9 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } +} // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], @@ -3552,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], @@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecIMul in def SSE_PMADD : OpndItins< IIC_SSE_PMADD, IIC_SSE_PMADD >; @@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, @@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], - itins.rr>; + itins.rr>, Sched<[WriteVecShift]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, - (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>; + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, + Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>, + Sched<[WriteVecShift]>; } /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>; + (bitconvert (memop_frag addr:$src2)))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), @@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P>; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX; + IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX2] in { @@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L; + IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + Sched<[WriteShuffleLd]>; } let Predicates = [UseSSE2] in { @@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; + IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, + Sched<[WriteShuffleLd]>; } } } // ExeDomain = SSEPackedInt @@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, @@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, def Yrr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, + Sched<[WriteShuffle]>; def Yrm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpNode VR256:$src1, - (bc_frag (memopv4i64 addr:$src2))))]>; + (bc_frag (memopv4i64 addr:$src2))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))], IIC_SSE_PINSRW>; + imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } // Extract @@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX, + Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>; + imm:$src2))], IIC_SSE_PEXTRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert let Predicates = [HasAVX] in { @@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in { def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst" in @@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), @@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteMove]>; def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + Sched<[WriteMove]>; def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar @@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int @@ -4317,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, + Sched<[WriteMove]>; def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + Sched<[WriteMove]>; def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // +let SchedRW = [WriteMove] in { def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), @@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; +} //SchedRW //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int @@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>, VEX; + IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Patterns and instructions to describe movd/movq to XMM register zero-extends // +let SchedRW = [WriteMove] in { let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), (v2i64 (scalar_to_vector GR64:$src)))))], IIC_SSE_MOVDQ>; } +} // SchedRW -let AddedComplexity = 20 in { +let AddedComplexity = 20, SchedRW = [WriteLoad] in { def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], IIC_SSE_MOVDQ>; -} +} // AddedComplexity, SchedRW let Predicates = [HasAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. @@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}", //===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int // + +let SchedRW = [WriteLoad] in { def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix +} // SchedRW //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int // +let SchedRW = [WriteStore] in { def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>; +} // SchedRW //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX, + Sched<[WriteStore]>; def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, VEX, Requires<[HasAVX]>; + XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>; let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[UseSSE2]>; + XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)), // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. // +let SchedRW = [WriteVecLogic] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, XS, Requires<[UseSSE2]>; +} // SchedRW +let SchedRW = [WriteVecLogicLd] in { let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; } +} // SchedRW let AddedComplexity = 20 in { let Predicates = [HasAVX] in { @@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in { } // Instructions to match in the assembler +let SchedRW = [WriteMove] in { def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; @@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; +} // SchedRW // Instructions for the disassembler // xr = XMM register // xm = mem64 +let SchedRW = [WriteMove] in { let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; +} // SchedRW //===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP @@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (vt (OpNode RC:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (OpNode (mem_frag addr:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> { let neverHasSideEffects = 1 in def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [], IIC_SSE_MOV_LH>; + [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (X86Movddup (scalar_to_vector (loadf64 addr:$src)))))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } // FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y<string OpcodeStr> { def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>; + [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, + Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup - (scalar_to_vector (loadf64 addr:$src)))))]>; + (scalar_to_vector (loadf64 addr:$src)))))]>, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in { // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// +let SchedRW = [WriteLoad] in { let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", @@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], IIC_SSE_LDDQU>; +} //===---------------------------------------------------------------------===// // SSE3 - Arithmetic @@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : I<0xD0, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { @@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (IntId128 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, - OpSize; + OpSize, Sched<[WriteVecALULd]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 VR256:$src))]>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 - (bitconvert (memopv4i64 addr:$src))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src))))]>, OpSize, + Sched<[WriteVecALULd]>; } let Predicates = [HasAVX] in { @@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; @@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins< def SSE_PHADDSUBW : OpndItins< IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM >; +} +let Sched = WriteShuffle in def SSE_PSHUFB : OpndItins< IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM >; +let Sched = WriteVecALU in def SSE_PSIGN : OpndItins< IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM >; +let Sched = WriteVecIMul in def SSE_PMULHRSW : OpndItins< IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW >; @@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, - (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize; + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. @@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, @@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), // SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, @@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; +} // SchedRW def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 3caa1b538c..053417ccde 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>, TB; @@ -35,6 +36,7 @@ let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))], IIC_INT3>; +} // SchedRW def : Pat<(debugtrap), (INT3)>; @@ -43,6 +45,7 @@ def : Pat<(debugtrap), // FIXME: This doesn't work because InstAlias can't match immediate constants. //def : InstAlias<"int\t$3", (INT3)>; +let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)], IIC_INT>; @@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>; def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Input/Output Instructions. // +let SchedRW = [WriteSystem] in { let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>; @@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>; def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize; def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from debug registers +let SchedRW = [WriteSystem] in { def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB; def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), @@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from control registers +let SchedRW = [WriteSystem] in { def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB; def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), @@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Segment override instruction prefixes @@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; // Moves to and from segment registers. // +let SchedRW = [WriteMove] in { def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize; def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), @@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; +} // SchedRW //===----------------------------------------------------------------------===// // Segmentation support instructions. +let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), @@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", [], IIC_VERW_MEM>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", [], IIC_VERW_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Descriptor-table support instructions +let SchedRW = [WriteSystem] in { def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), @@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), "lldt{w}\t$src", [], IIC_LLDT_REG>, TB; def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB; - +} // SchedRW + //===----------------------------------------------------------------------===// // Specialized register support +let SchedRW = [WriteSystem] in { def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB; def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB; def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB; @@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB; def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Cache instructions +let SchedRW = [WriteSystem] in { def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // XSAVE instructions +let SchedRW = [WriteSystem] in { let Defs = [RDX, RAX], Uses = [RCX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; @@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in { def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; } +} // SchedRW //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3af1b3e06b..a8a9fd8acc 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -407,6 +407,57 @@ ReSimplify: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; + // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B + // if one of the registers is extended, but other isn't. + case X86::VMOVAPDrr: + case X86::VMOVAPDYrr: + case X86::VMOVAPSrr: + case X86::VMOVAPSYrr: + case X86::VMOVDQArr: + case X86::VMOVDQAYrr: + case X86::VMOVDQUrr: + case X86::VMOVDQUYrr: + case X86::VMOVUPDrr: + case X86::VMOVUPDYrr: + case X86::VMOVUPSrr: + case X86::VMOVUPSYrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + case X86::VMOVSDrr: + case X86::VMOVSSrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; + case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register // inputs modeled as normal uses instead of implicit uses. As such, truncate // off all but the first operand (the callee). FIXME: Change isel. diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d99d085298..bcdd0eb56d 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -7,6 +7,88 @@ // //===----------------------------------------------------------------------===// +// InstrSchedModel annotations for out-of-order CPUs. +// +// These annotations are independent of the itinerary classes defined below. + +// Instructions with folded loads need to read the memory operand immediately, +// but other register operands don't have to be read until the load is ready. +// These operands are marked with ReadAfterLd. +def ReadAfterLd : SchedRead; + +// Instructions with both a load and a store folded are modeled as a folded +// load + WriteRMW. +def WriteRMW : SchedWrite; + +// Most instructions can fold loads, so almost every SchedWrite comes in two +// variants: With and without a folded load. +// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite +// with a folded load. +class X86FoldableSchedWrite : SchedWrite { + // The SchedWrite to use when a load is folded into the instruction. + SchedWrite Folded; +} + +// Multiclass that produces a linked pair of SchedWrites. +multiclass X86SchedWritePair { + // Register-Memory operation. + def Ld : SchedWrite; + // Register-Register operation. + def NAME : X86FoldableSchedWrite { + let Folded = !cast<SchedWrite>(NAME#"Ld"); + } +} + +// Arithmetic. +defm WriteALU : X86SchedWritePair; // Simple integer ALU op. +defm WriteIMul : X86SchedWritePair; // Integer multiplication. +defm WriteIDiv : X86SchedWritePair; // Integer division. +def WriteLEA : SchedWrite; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm WriteShift : X86SchedWritePair; + +// Loads, stores, and moves, not folded with other operations. +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteMove : SchedWrite; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def WriteZero : SchedWrite; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm WriteJump : X86SchedWritePair; + +// Floating point. This covers both scalar and vector operations. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. + +// Vector integer operations. +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. +defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. +defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends. + +// Conversion between integer and float. +defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. +defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. +defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. + +// Catch-all for expensive system instructions. +def WriteSystem : SchedWrite; + +// Old microcoded instructions that nobody use. +def WriteMicrocoded : SchedWrite; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 def IIC_DEFAULT : InstrItinClass; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index be2a997b8e..3e3b86edbb 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -169,6 +169,29 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + static const CostTblEntry<MVT> AVX2CostTable[] = { + // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to + // customize them to detect the cases where shift amount is a scalar one. + { ISD::SHL, MVT::v4i32, 1 }, + { ISD::SRL, MVT::v4i32, 1 }, + { ISD::SRA, MVT::v4i32, 1 }, + { ISD::SHL, MVT::v8i32, 1 }, + { ISD::SRL, MVT::v8i32, 1 }, + { ISD::SRA, MVT::v8i32, 1 }, + { ISD::SHL, MVT::v2i64, 1 }, + { ISD::SRL, MVT::v2i64, 1 }, + { ISD::SHL, MVT::v4i64, 1 }, + { ISD::SRL, MVT::v4i64, 1 }, + }; + + // Look for AVX2 lowering tricks. + if (ST->hasAVX2()) { + int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable), + ISD, LT.second); + if (Idx != -1) + return LT.first * AVX2CostTable[Idx].Cost; + } + static const CostTblEntry<MVT> AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. @@ -257,8 +280,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; |