diff options
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 | ||||
-rw-r--r-- | test/CodeGen/X86/scalar-extract.ll | 13 |
2 files changed, 26 insertions, 5 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f8b914268c..230079b420 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4878,16 +4878,24 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { - // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) - // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) - // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + // (vextract (scalar_to_vector val, 0) -> val + SDValue InVec = N->getOperand(0); + SDValue EltNo = N->getOperand(1); + + if (isa<ConstantSDNode>(EltNo)) { + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && Elt == 0) { + return InVec.getOperand(0); + } + } // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); - SDValue InVec = N->getOperand(0); - SDValue EltNo = N->getOperand(1); + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) if (isa<ConstantSDNode>(EltNo)) { unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); diff --git a/test/CodeGen/X86/scalar-extract.ll b/test/CodeGen/X86/scalar-extract.ll new file mode 100644 index 0000000000..172c424a78 --- /dev/null +++ b/test/CodeGen/X86/scalar-extract.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -o %t -f +; RUN: not grep movq %t + +; Check that widening doesn't introduce a mmx register in this case when +; a simple load/store would suffice. + +define void @foo(<2 x i16>* %A, <2 x i16>* %B) { +entry: + %tmp1 = load <2 x i16>* %A ; <<2 x i16>> [#uses=1] + store <2 x i16> %tmp1, <2 x i16>* %B + ret void +} + |