aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2006-03-20 01:53:53 +0000
committerChris Lattner <sabre@nondot.org>2006-03-20 01:53:53 +0000
commitf1d0b2bedaa065972a5ba17259055c1176cd1497 (patch)
tree4003a9c75507e65ee931055125273f42bbf92e5d
parent2bc6dc27e952dd0225b3eb2f68d9195f8b415cec (diff)
Custom lower arbitrary VECTOR_SHUFFLE's to VPERM.
TODO: leave specific ones as VECTOR_SHUFFLE's and turn them into specialized operations like vsplt* git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26887 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp46
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td25
3 files changed, 60 insertions, 15 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 5a2acd46d9..258de780eb 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -167,6 +167,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
+
+ // FIXME: We don't support any BUILD_VECTOR's yet. We should custom expand
+ // the ones we do, like splat(0.0) and splat(-0.0).
+ setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
}
if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
@@ -179,11 +184,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
setOperationAction(ISD::LOAD , MVT::v4f32, Legal);
setOperationAction(ISD::ADD , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
- // FIXME: We don't support any BUILD_VECTOR's yet. We should custom expand
- // the ones we do!
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Expand);
-
+ setOperationAction(ISD::LOAD , MVT::v16i8, Legal);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
}
@@ -209,6 +214,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::LVE_X: return "PPCISD::LVE_X";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
@@ -566,6 +572,36 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(PPCISD::LVE_X, Op.getValueType(), Store, FIdx,
DAG.getSrcValue(NULL));
}
+ case ISD::VECTOR_SHUFFLE: {
+ // FIXME: Cases that are handled by instructions that take permute
+ // immediates (such as vsplt*) shouldn't be lowered here! Also handle cases
+ // that are cheaper to do as multiple such instructions than as a constant
+ // pool load/vperm pair.
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ SDOperand V1 = Op.getOperand(0);
+ SDOperand V2 = Op.getOperand(1);
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+ SDOperand PermMask = Op.getOperand(2);
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+ MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
+ unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
+
+ std::vector<SDOperand> ResultMask;
+ for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
+ unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i8));
+ }
+
+ SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
+ return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
+ }
}
return SDOperand();
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 058f85bb07..1e883001a6 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -56,6 +56,10 @@ namespace llvm {
/// the third is the SRCVALUE node.
LVE_X,
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
/// Hi/Lo - These represent the high and low 16-bit parts of a global
/// address respectively. These nodes have two operands, the first of
/// which must be a TargetGlobalAddress, and the second of which must be a
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a7899c8038..ce308ee533 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -26,6 +26,10 @@ def SDT_PPCShiftOp : SDTypeProfile<1, 2, [ // PPCshl, PPCsra, PPCsrl
def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>;
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@@ -46,6 +50,7 @@ def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
def PPClve_x : SDNode<"PPCISD::LVE_X", SDTLoad, [SDNPHasChain]>;
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
// amounts. These nodes are generated by the multi-precision shift code.
@@ -118,15 +123,6 @@ def imm16Shifted : PatLeaf<(imm), [{
return ((unsigned)N->getValue() & 0xFFFF0000U) == (unsigned)N->getValue();
}], HI16>;
-/*
-// Example of a legalize expander: Only for PPC64.
-def : Expander<(set i64:$dst, (fp_to_sint f64:$src)),
- [(set f64:$tmp , (FCTIDZ f64:$src)),
- (set i32:$tmpFI, (CreateNewFrameIndex 8, 8)),
- (store f64:$tmp, i32:$tmpFI),
- (set i64:$dst, (load i32:$tmpFI))],
- Subtarget_PPC64>;
-*/
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -956,7 +952,9 @@ def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
Requires<[FPContractions]>;
def VPERM : VAForm_1<43, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
- "vperm $vD, $vA, $vC, $vB", VecFP, []>;
+ "vperm $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD,
+ (PPCvperm (v4f32 VRRC:$vA), VRRC:$vB, VRRC:$vC))]>;
// VX-Form instructions. AltiVec arithmetic ops.
@@ -1153,6 +1151,13 @@ def : Pat<(f64 (extload xaddr:$src, f32)),
def : Pat<(v4i32 (load xoaddr:$src)),
(v4i32 (LVX xoaddr:$src))>;
+def : Pat<(v16i8 (load xoaddr:$src)),
+ (v16i8 (LVX xoaddr:$src))>;
+
+
+def : Pat<(PPCvperm (v4i32 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+ (v4i32 (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC))>;
+
def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
(STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
def : Pat<(v4i32 (PPClve_x xoaddr:$src)),