diff options
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 19 | ||||
-rw-r--r-- | lib/Target/X86/X86CodeEmitter.cpp | 20 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/avx2-intrinsics-x86.ll | 19 | ||||
-rw-r--r-- | utils/TableGen/X86RecognizableInstr.cpp | 26 | ||||
-rw-r--r-- | utils/TableGen/X86RecognizableInstr.h | 2 |
7 files changed, 77 insertions, 30 deletions
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 52506fa185..4a38324d08 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -572,8 +572,14 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1) + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; + else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!"); @@ -971,11 +977,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1) + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; - else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; + else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } // Keep track of the current byte being emitted. unsigned CurByte = 0; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 3079dfa7cf..977cc50c13 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -935,8 +935,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1) + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: // Duplicate register. @@ -1117,11 +1124,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, // If this is a two-address instruction, skip one of the register operands. unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1) + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; - else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } uint64_t TSFlags = Desc->TSFlags; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 817013011c..5186482a0d 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1966,14 +1966,22 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { if (!Scale) return 0; + SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), + MVT::Other); + // Memory Operands: Base, Scale, Index, Disp, Segment SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); SDValue Segment = CurDAG->getRegister(0, MVT::i32); const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, Disp, Segment, VMask, Chain}; SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - VSrc.getValueType(), MVT::Other, - Ops, array_lengthof(Ops)); + VTs, Ops, array_lengthof(Ops)); + // Node has 2 outputs: VDst and MVT::Other. + // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. + // We replace VDst of Node with VDst of ResNode, and Other of Node with Other + // of ResNode. + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2)); return ResNode; } @@ -2034,7 +2042,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } SDNode *RetVal = SelectGather(Node, Opc); if (RetVal) - return RetVal; + // We already called ReplaceUses inside SelectGather. + return NULL; break; } } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5e96eecf8a..0ad92413ff 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8038,19 +8038,19 @@ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; // VGATHER - GATHER Operations multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, X86MemOperand memop256> { - def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), + def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb), (ins VR128:$src1, v128mem:$src2, VR128:$mask), !strconcat(OpcodeStr, "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), []>, VEX_4VOp3; - def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst), + def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb), (ins RC256:$src1, memop256:$src2, RC256:$mask), !strconcat(OpcodeStr, "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), []>, VEX_4VOp3, VEX_L; } -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst, $mask = $mask_wb" in { defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, v128mem>, VEX_W; defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, v256mem>, VEX_W; defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, v256mem>; diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 459dbb235a..a6141b0956 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1136,3 +1136,22 @@ define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, } declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly + +; PR13298 +define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, + <8 x i32> %idx, <8 x float> %mask, + float* nocapture %out) { +; CHECK: test_gather_mask +; CHECK: vmovdqa %ymm2, [[DEST:%.*]] +; CHECK: vgatherdps [[DEST]] +;; gather with mask + %a_i8 = bitcast float* %a to i8* + %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, + i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ; + +;; for debugging, we'll just dump out the mask + %out_ptr = bitcast float * %out to <8 x float> * + store <8 x float> %mask, <8 x float> * %out_ptr, align 4 + + ret <8 x float> %res +} diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index 0ab21c8a54..1cc67c24e3 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -277,8 +277,8 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, } void RecognizableInstr::processInstr(DisassemblerTables &tables, - const CodeGenInstruction &insn, - InstrUID uid) + const CodeGenInstruction &insn, + InstrUID uid) { // Ignore "asm parser only" instructions. if (insn.TheDef->getValueAsBit("isAsmParserOnly")) @@ -508,13 +508,13 @@ bool RecognizableInstr::has256BitOperands() const { return false; } -void RecognizableInstr::handleOperand( - bool optional, - unsigned &operandIndex, - unsigned &physicalOperandIndex, - unsigned &numPhysicalOperands, - unsigned *operandMapping, - OperandEncoding (*encodingFromString)(const std::string&, bool hasOpSizePrefix)) { +void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex, + unsigned &physicalOperandIndex, + unsigned &numPhysicalOperands, + const unsigned *operandMapping, + OperandEncoding (*encodingFromString) + (const std::string&, + bool hasOpSizePrefix)) { if (optional) { if (physicalOperandIndex >= numPhysicalOperands) return; @@ -563,7 +563,6 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands; - unsigned operandIndex; unsigned numOperands = OperandList.size(); unsigned numPhysicalOperands = 0; @@ -575,12 +574,13 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { assert(numOperands <= X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough"); - for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) { + for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) { if (OperandList[operandIndex].Constraints.size()) { const CGIOperandList::ConstraintInfo &Constraint = OperandList[operandIndex].Constraints[0]; if (Constraint.isTied()) { - operandMapping[operandIndex] = Constraint.getTiedOperand(); + operandMapping[operandIndex] = operandIndex; + operandMapping[Constraint.getTiedOperand()] = operandIndex; } else { ++numPhysicalOperands; operandMapping[operandIndex] = operandIndex; @@ -621,7 +621,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { class##EncodingFromString); // operandIndex should always be < numOperands - operandIndex = 0; + unsigned operandIndex = 0; // physicalOperandIndex should always be < numPhysicalOperands unsigned physicalOperandIndex = 0; diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h index 6c0a234b5e..542e510c60 100644 --- a/utils/TableGen/X86RecognizableInstr.h +++ b/utils/TableGen/X86RecognizableInstr.h @@ -204,7 +204,7 @@ private: unsigned &operandIndex, unsigned &physicalOperandIndex, unsigned &numPhysicalOperands, - unsigned *operandMapping, + const unsigned *operandMapping, OperandEncoding (*encodingFromString) (const std::string&, bool hasOpSizePrefix)); |