aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Michel <scottm@aero.org>2009-01-15 04:41:47 +0000
committerScott Michel <scottm@aero.org>2009-01-15 04:41:47 +0000
commit94bd57e154088f2d45c465e73f896f64f6da4ade (patch)
tree54cfa11f05e8040e9d7d18949bbeb5d23ecfce5e
parentf9b1d79a549269f0630edd4893c654953ae07a6a (diff)
- Convert remaining i64 custom lowering into custom instruction emission
sequences in SPUDAGToDAGISel.cpp and SPU64InstrInfo.td, killing custom DAG node types as needed. - i64 mul is now a legal instruction, but emits an instruction sequence that stretches tblgen and the imagination, as well as violating laws of several small countries and most southern US states (just kidding, but looking at a function with 80+ parameters is really weird and just plain wrong.) - Update tests as needed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62254 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td143
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp112
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp172
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h25
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td40
-rw-r--r--lib/Target/CellSPU/SPUMathInstr.td2
-rw-r--r--lib/Target/CellSPU/SPUNodes.td24
-rw-r--r--test/CodeGen/CellSPU/i64ops.ll17
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i64operations.c32
9 files changed, 373 insertions, 194 deletions
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 74c0ecad7f..cb8b48bc1f 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -2,7 +2,6 @@
//
// Cell SPU 64-bit operations
//
-// Primary author: Scott Michel (scottm@aero.org)
//===----------------------------------------------------------------------===//
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@@ -240,3 +239,145 @@ def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
// i64 setult:
def : I64SETCCNegCond<setlt, I64GEr64>;
def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+ CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+ CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+ v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_add<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+ CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_sub<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ v2i64_sub_bg<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+ CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+ CodeFrag<(Av4i32
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
+ v2i64_mul_ahi64<rA>.Fragment),
+ (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
+ v2i64_mul_bshlq4<rB>.Fragment)),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+ v2i64_mul_ashlq4<rA>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment),
+ (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+ CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+ v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+ v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+ v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+ (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+ (ILv4i32 0),
+ (FSMBIv4i32 0x0f0f)),
+ rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 76b2284369..1f00bacb5e 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -18,11 +18,13 @@
#include "SPUHazardRecognizers.h"
#include "SPUFrameInfo.h"
#include "SPURegisterNames.h"
+#include "SPUTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Constants.h"
@@ -254,6 +256,26 @@ public:
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
}
+ SDNode *emitBuildVector(SDValue build_vec) {
+ std::vector<Constant*> CV;
+
+ for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(build_vec.getOperand(i));
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+ unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue CGPoolOffset =
+ SPU::LowerConstantPool(CPIdx, *CurDAG,
+ SPUtli.getSPUTargetMachine());
+ return SelectCode(CurDAG->getLoad(build_vec.getValueType(),
+ CurDAG->getEntryNode(), CGPoolOffset,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment));
+ }
+
/// Select - Convert the specified operand from a target-independent to a
/// target-specific node if it hasn't already been changed.
SDNode *Select(SDValue Op);
@@ -647,22 +669,82 @@ SPUDAGToDAGISel::Select(SDValue Op) {
TFI, Imm0), 0);
n_ops = 2;
}
- } else if (Opc == ISD::ZERO_EXTEND) {
- // (zero_extend:i16 (and:i8 <arg>, <const>))
- const SDValue &Op1 = N->getOperand(0);
-
- if (Op.getValueType() == MVT::i16 && Op1.getValueType() == MVT::i8) {
- if (Op1.getOpcode() == ISD::AND) {
- // Fold this into a single ANDHI. This is often seen in expansions of i1
- // to i8, then i8 to i16 in logical/branching operations.
- DEBUG(cerr << "CellSPU: Coalescing (zero_extend:i16 (and:i8 "
- "<arg>, <const>))\n");
- NewOpc = SPU::ANDHIi8i16;
- Ops[0] = Op1.getOperand(0);
- Ops[1] = Op1.getOperand(1);
- n_ops = 2;
- }
+ } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+ && OpVT == MVT::i64) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+ MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
+ MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue shufMask;
+
+ switch (Op0VT.getSimpleVT()) {
+ default:
+ cerr << "CellSPU Select: Unhandled zero/any extend MVT\n";
+ abort();
+ /*NOTREACHED*/
+ break;
+ case MVT::i32:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x00010203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x08090a0b, MVT::i32));
+ break;
+
+ case MVT::i16:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800a0b, MVT::i32));
+ break;
+
+ case MVT::i8:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80808003, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x8080800b, MVT::i32));
+ break;
}
+
+ SDNode *shufMaskLoad = emitBuildVector(shufMask);
+ SDNode *PromoteScalar =
+ SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0));
+
+ SDValue zextShuffle =
+ CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
+ SDValue(PromoteScalar, 0),
+ SDValue(PromoteScalar, 0),
+ SDValue(shufMaskLoad, 0));
+
+ // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
+ // re-use it in the VEC2PREFSLOT selection without needing to explicitly
+ // call SelectCode (it's already done for us.)
+ SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, OpVecVT, zextShuffle));
+ return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, OpVT,
+ zextShuffle));
+ } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
+
+ return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
+
+ return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
+
+ return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
} else if (Opc == ISD::SHL) {
if (OpVT == MVT::i64) {
return SelectSHLi64(Op, OpVT);
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 9dd9855850..92bd92886c 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -78,6 +78,7 @@ namespace {
return retval;
}
+
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
@@ -208,13 +209,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
setOperationAction(ISD::MUL, MVT::i32, Legal);
- setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i8, Custom);
- setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::SUB, MVT::i8, Custom);
- setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
// SPU does not have BSWAP. It does have i32 support CTLZ.
// CTPOP has to be custom lowered.
@@ -243,11 +244,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::i32, Legal);
setOperationAction(ISD::SETCC, MVT::i64, Legal);
- // Zero extension and sign extension for i64 have to be
- // custom legalized
- setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
-
// Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
@@ -416,10 +412,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
- node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
- node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
- node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
- node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
+ node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+ node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+ node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
}
std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
@@ -778,8 +773,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
return SDValue();
}
-/// Generate the address of a constant pool entry.
-static SDValue
+//! Generate the address of a constant pool entry.
+SDValue
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
MVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
@@ -805,6 +800,12 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
return SDValue();
}
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+ return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
static SDValue
LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
MVT PtrVT = Op.getValueType();
@@ -2185,123 +2186,34 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
return SDValue();
}
-static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
-{
- MVT VT = Op.getValueType();
- MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+//! Generate the carry-generate shuffle mask.
+SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
+SmallVector<SDValue, 16> ShufBytes;
- SDValue Op0 = Op.getOperand(0);
+// Create the shuffle mask for "rotating" the borrow up one register slot
+// once the borrow is generated.
+ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
- switch (Opc) {
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND: {
- MVT Op0VT = Op0.getValueType();
- MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
-
- SDValue PromoteScalar =
- DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
-
- // Use a shuffle to zero extend the i32 to i64 directly:
- SDValue shufMask;
-
- switch (Op0VT.getSimpleVT()) {
- default:
- cerr << "CellSPU LowerI64Math: Unhandled zero/any extend MVT\n";
- abort();
- /*NOTREACHED*/
- break;
- case MVT::i32:
- shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x00010203, MVT::i32),
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x08090a0b, MVT::i32));
- break;
-
- case MVT::i16:
- shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x80800203, MVT::i32),
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x80800a0b, MVT::i32));
- break;
-
- case MVT::i8:
- shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x80808003, MVT::i32),
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x8080800b, MVT::i32));
- break;
- }
-
- SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
- PromoteScalar, PromoteScalar, shufMask);
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
- DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
- }
+return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+}
- case ISD::ADD: {
- // Turn operands into vectors to satisfy type checking (shufb works on
- // vectors)
- SDValue Op0 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
- SDValue Op1 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
- SmallVector<SDValue, 16> ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-
- SDValue CarryGen =
- DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
- SDValue ShiftedCarry =
- DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
- CarryGen, CarryGen,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
- DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
- Op0, Op1, ShiftedCarry));
- }
+//! Generate the borrow-generate shuffle mask
+SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
+SmallVector<SDValue, 16> ShufBytes;
- case ISD::SUB: {
- // Turn operands into vectors to satisfy type checking (shufb works on
- // vectors)
- SDValue Op0 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
- SDValue Op1 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
- SmallVector<SDValue, 16> ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-
- SDValue BorrowGen =
- DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
- SDValue ShiftedBorrow =
- DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
- BorrowGen, BorrowGen,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
- DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
- Op0, Op1, ShiftedBorrow));
- }
- }
+// Create the shuffle mask for "rotating" the borrow up one register slot
+// once the borrow is generated.
+ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
- return SDValue();
+return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
}
//! Lower byte immediate operations for v16i8 vectors:
@@ -2576,11 +2488,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::RET:
return LowerRET(Op, DAG, getTargetMachine());
-
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- return LowerI64Math(Op, DAG, Opc);
-
// i8, i64 math ops:
case ISD::ADD:
case ISD::SUB:
@@ -2591,8 +2498,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::SRA: {
if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc, *this);
- else if (VT == MVT::i64)
- return LowerI64Math(Op, DAG, Opc);
break;
}
@@ -2831,6 +2736,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
break;
}
}
+
// Otherwise, return unchanged.
#ifndef NDEBUG
if (Result.getNode()) {
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 24b8f82ecb..a98a8f6bbe 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -52,10 +52,11 @@ namespace llvm {
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
- ADD_EXTENDED, ///< Add extended, with carry
- CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
- SUB_EXTENDED, ///< Subtract extended, with borrow
- BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED
+ // Markers: These aren't used to generate target-dependent nodes, but
+ // are used during instruction selection.
+ ADD64_MARKER, ///< i64 addition marker
+ SUB64_MARKER, ///< i64 subtraction marker
+ MUL64_MARKER, ///< i64 multiply marker
LAST_SPUISD ///< Last user-defined instruction
};
}
@@ -74,6 +75,12 @@ namespace llvm {
MVT ValueType);
SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetMachine &TM);
+
+ SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
+ SDValue getCarryGenerateShufMask(SelectionDAG &DAG);
}
class SPUTargetMachine; // forward dec'l.
@@ -86,8 +93,18 @@ namespace llvm {
SPUTargetMachine &SPUTM;
public:
+ //! The venerable constructor
+ /*!
+ This is where the CellSPU backend sets operation handling (i.e., legal,
+ custom, expand or promote.)
+ */
SPUTargetLowering(SPUTargetMachine &TM);
+ //! Get the target machine
+ SPUTargetMachine &getSPUTargetMachine() {
+ return SPUTM;
+ }
+
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index b9956402d9..b639ec254a 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -705,17 +705,14 @@ class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
class ADDXVecInst<ValueType vectype>:
ADDXInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
- [(set (vectype VECREG:$rT),
- (SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB),
- (vectype VECREG:$rCarry)))]>,
+ [/* no pattern */]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
class ADDXRegInst<RegisterClass rclass>:
ADDXInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
- [(set rclass:$rT,
- (SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
+ [/* no pattern */]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
@@ -737,14 +734,12 @@ class CGInst<dag OOL, dag IOL, list<dag> pattern>:
class CGVecInst<ValueType vectype>:
CGInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+ [/* no pattern */]>;
class CGRegInst<RegisterClass rclass>:
CGInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT,
- (SPUcarry_gen rclass:$rA, rclass:$rB))]>;
+ [/* no pattern */]>;
multiclass CarryGenerate {
def v2i64 : CGVecInst<v2i64>;
@@ -765,17 +760,14 @@ class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
class SFXVecInst<ValueType vectype>:
SFXInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
- [(set (vectype VECREG:$rT),
- (SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB),
- (vectype VECREG:$rCarry)))]>,
+ [/* no pattern */]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
class SFXRegInst<RegisterClass rclass>:
SFXInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
- [(set rclass:$rT,
- (SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
+ [/* no pattern */]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
@@ -797,14 +789,12 @@ class BGInst<dag OOL, dag IOL, list<dag> pattern>:
class BGVecInst<ValueType vectype>:
BGInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+ [/* no pattern */]>;
class BGRegInst<RegisterClass rclass>:
BGInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT,
- (SPUborrow_gen rclass:$rA, rclass:$rB))]>;
+ [/* no pattern */]>;
multiclass BorrowGenerate {
def v4i32 : BGVecInst<v4i32>;
@@ -894,7 +884,7 @@ class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
pattern>;
-def MPYAvec:
+def MPYAv4i32:
MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
[(set (v4i32 VECREG:$rT),
(add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
@@ -939,7 +929,7 @@ class MPYSInst<dag OOL, dag IOL>:
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
[/* no pattern */]>;
-def MPYSvec:
+def MPYSv4i32:
MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYSr16:
@@ -972,14 +962,20 @@ def MPYHHAvec:
def MPYHHAr32:
MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-// mpyhhu: Multiply high-high, unsigned
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+ +-------+-------+ +---------+
+// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
+// +-------+-------+ +-------+-------+ +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
class MPYHHUInst<dag OOL, dag IOL>:
RRForm<0b01110011110, OOL, IOL,
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
[/* no pattern */]>;
-def MPYHHUvec:
+def MPYHHUv4i32:
MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYHHUr32:
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
index 38279a0a9f..64548fd8c0 100644
--- a/lib/Target/CellSPU/SPUMathInstr.td
+++ b/lib/Target/CellSPU/SPUMathInstr.td
@@ -8,8 +8,6 @@
//
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
// purely and completely coincidental.
-//
-// Primary author: Scott Michel (scottm@aero.org)
//===----------------------------------------------------------------------===//
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index cae6023cd4..87c4115d1b 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -61,18 +61,20 @@ def SPUselb_type: SDTypeProfile<1, 3, [
def SPUvecshift_type: SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
//===----------------------------------------------------------------------===//
// Synthetic/pseudo-instructions
//===----------------------------------------------------------------------===//
-/// Add extended, carry generate:
-def SPUaddx : SDNode<"SPUISD::ADD_EXTENDED", SPUIntTrinaryOp, []>;
-def SPUcarry_gen : SDNode<"SPUISD::CARRY_GENERATE", SDTIntBinOp, []>;
-
-// Subtract extended, borrow generate
-def SPUsubx : SDNode<"SPUISD::SUB_EXTENDED", SPUIntTrinaryOp, []>;
-def SPUborrow_gen : SDNode<"SPUISD::BORROW_GENERATE", SDTIntBinOp, []>;
-
// SPU CNTB:
def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
@@ -127,6 +129,12 @@ def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
//===----------------------------------------------------------------------===//
// Constraints: (taken from PPCInstrInfo.td)
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
index d118c5f88c..dd6782772a 100644
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ b/test/CodeGen/CellSPU/i64ops.ll
@@ -2,9 +2,15 @@
; RUN: grep xswd %t1.s | count 3
; RUN: grep xsbh %t1.s | count 1
; RUN: grep xshw %t1.s | count 2
-; RUN: grep shufb %t1.s | count 4
-; RUN: grep cg %t1.s | count 1
-; RUN: grep addx %t1.s | count 1
+; RUN: grep shufb %t1.s | count 7
+; RUN: grep cg %t1.s | count 4
+; RUN: grep addx %t1.s | count 4
+; RUN: grep fsmbi %t1.s | count 3
+; RUN: grep il %t1.s | count 2
+; RUN: grep mpy %t1.s | count 10
+; RUN: grep mpyh %t1.s | count 6
+; RUN: grep mpyhhu %t1.s | count 2
+; RUN: grep mpyu %t1.s | count 4
; ModuleID = 'stores.bc'
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -44,3 +50,8 @@ define i64 @add_i64(i64 %a, i64 %b) nounwind {
%1 = add i64 %a, %b
ret i64 %1
}
+
+define i64 @mul_i64(i64 %a, i64 %b) nounwind {
+ %1 = mul i64 %a, %b
+ ret i64 %1
+}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
index 7a4bf1ab0d..b613bd872e 100644
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
+++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
@@ -7,6 +7,7 @@ int64_t tval_c = 1234567890001LL;
int64_t tval_d = 10001LL;
int64_t tval_e = 10000LL;
uint64_t tval_f = 0xffffff0750135eb9;
+int64_t tval_g = -1;
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
@@ -546,6 +547,12 @@ test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), in
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+int64_t i64_mul(int64_t a, int64_t b) {
+ return a * b;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
int
main(void)
{
@@ -553,12 +560,13 @@ main(void)
const char *something_failed = " %d tests failed.\n";
const char *all_tests_passed = " All tests passed.\n";
- printf("tval_a = %20lld (0x%020llx)\n", tval_a, tval_a);
- printf("tval_b = %20lld (0x%020llx)\n", tval_b, tval_b);
- printf("tval_c = %20lld (0x%020llx)\n", tval_c, tval_c);
- printf("tval_d = %20lld (0x%020llx)\n", tval_d, tval_d);
- printf("tval_e = %20lld (0x%020llx)\n", tval_e, tval_e);
- printf("tval_f = %20llu (0x%020llx)\n", tval_f, tval_f);
+ printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
+ printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
+ printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
+ printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
+ printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
+ printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
+ printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
printf("----------------------------------------\n");
for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
@@ -649,5 +657,17 @@ main(void)
printf("----------------------------------------\n");
+ int64_t result;
+
+ result = i64_mul(tval_g, tval_g);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
+ result = i64_mul(tval_d, tval_e);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
+ /* 0xba7a664f13077c9 */
+ result = i64_mul(tval_a, tval_b);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
+
+ printf("----------------------------------------\n");
+
return 0;
}