diff options
author | Dan Gohman <gohman@apple.com> | 2008-04-16 02:32:24 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2008-04-16 02:32:24 +0000 |
commit | 171c11ec93d74c71f7e4f8bfb9c9cd5971214b53 (patch) | |
tree | 65821acd32435f1829eae55dce15522acd044695 | |
parent | bcda285fcc98129ce48c4eda72a7f3595c7685ec (diff) |
Add support for the form of the SSE41 extractps instruction that
puts its result in a 32-bit GPR.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49762 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 5 | ||||
-rw-r--r-- | test/CodeGen/X86/sse41-extractps-bitcast-0.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/X86/sse41-extractps-bitcast-1.ll | 19 |
4 files changed, 37 insertions, 5 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d8eaee71c8..69acf1a018 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3833,11 +3833,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op, } else if (VT == MVT::f32) { // EXTRACTPS outputs to a GPR32 register which will require a movd to copy // the result back to FR32 register. It's only worth matching if the - // result has a single use which is a store. + // result has a single use which is a store or a bitcast to i32. if (!Op.hasOneUse()) return SDOperand(); SDNode *User = Op.Val->use_begin()->getUser(); - if (User->getOpcode() != ISD::STORE) + if (User->getOpcode() != ISD::STORE && + (User->getOpcode() != ISD::BIT_CONVERT || + User->getValueType(0) != MVT::i32)) return SDOperand(); SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 442847cda8..982b0dc6df 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3387,13 +3387,12 @@ defm PEXTRD : SS41I_extract32<0x16, "pextrd">; /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory /// destination multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { - // Not worth matching to rr form of extractps since the result is in GPR32. def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [/*(set GR32:$dst, - (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))*/]>, + [(set GR32:$dst, + (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, OpSize; def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2), diff --git a/test/CodeGen/X86/sse41-extractps-bitcast-0.ll b/test/CodeGen/X86/sse41-extractps-bitcast-0.ll new file mode 100644 index 0000000000..bcfaa7a0e5 --- /dev/null +++ b/test/CodeGen/X86/sse41-extractps-bitcast-0.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | grep extractps | count 2 + +define i32 @foo(<4 x float> %v) { + %s = extractelement <4 x float> %v, i32 3 + %i = bitcast float %s to i32 + ret i32 %i +} +define i32 @boo(<4 x float> %v) { + %t = bitcast <4 x float> %v to <4 x i32> + %s = extractelement <4 x i32> %t, i32 3 + ret i32 %s +} diff --git a/test/CodeGen/X86/sse41-extractps-bitcast-1.ll b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll new file mode 100644 index 0000000000..fc0df06062 --- /dev/null +++ b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | not grep extractps + +; The non-store form of extractps puts its result into a GPR. +; This makes it suitable for an extract from a <4 x float> that +; is bitcasted to i32, but unsuitable for much of anything else. + +define float @bar(<4 x float> %v) { + %s = extractelement <4 x float> %v, i32 3 + %t = add float %s, 1.0 + ret float %t +} +define float @baz(<4 x float> %v) { + %s = extractelement <4 x float> %v, i32 3 + ret float %s +} +define i32 @qux(<4 x i32> %v) { + %i = extractelement <4 x i32> %v, i32 3 + ret i32 %i +} |