aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2008-04-16 02:32:24 +0000
committerDan Gohman <gohman@apple.com>2008-04-16 02:32:24 +0000
commit171c11ec93d74c71f7e4f8bfb9c9cd5971214b53 (patch)
tree65821acd32435f1829eae55dce15522acd044695
parentbcda285fcc98129ce48c4eda72a7f3595c7685ec (diff)
Add support for the form of the SSE41 extractps instruction that
puts its result in a 32-bit GPR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49762 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--lib/Target/X86/X86InstrSSE.td5
-rw-r--r--test/CodeGen/X86/sse41-extractps-bitcast-0.ll12
-rw-r--r--test/CodeGen/X86/sse41-extractps-bitcast-1.ll19
4 files changed, 37 insertions, 5 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d8eaee71c8..69acf1a018 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3833,11 +3833,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
} else if (VT == MVT::f32) {
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
// the result back to FR32 register. It's only worth matching if the
- // result has a single use which is a store.
+ // result has a single use which is a store or a bitcast to i32.
if (!Op.hasOneUse())
return SDOperand();
SDNode *User = Op.Val->use_begin()->getUser();
- if (User->getOpcode() != ISD::STORE)
+ if (User->getOpcode() != ISD::STORE &&
+ (User->getOpcode() != ISD::BIT_CONVERT ||
+ User->getValueType(0) != MVT::i32))
return SDOperand();
SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 442847cda8..982b0dc6df 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3387,13 +3387,12 @@ defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
- // Not worth matching to rr form of extractps since the result is in GPR32.
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [/*(set GR32:$dst,
- (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))*/]>,
+ [(set GR32:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
OpSize;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
diff --git a/test/CodeGen/X86/sse41-extractps-bitcast-0.ll b/test/CodeGen/X86/sse41-extractps-bitcast-0.ll
new file mode 100644
index 0000000000..bcfaa7a0e5
--- /dev/null
+++ b/test/CodeGen/X86/sse41-extractps-bitcast-0.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | grep extractps | count 2
+
+define i32 @foo(<4 x float> %v) {
+ %s = extractelement <4 x float> %v, i32 3
+ %i = bitcast float %s to i32
+ ret i32 %i
+}
+define i32 @boo(<4 x float> %v) {
+ %t = bitcast <4 x float> %v to <4 x i32>
+ %s = extractelement <4 x i32> %t, i32 3
+ ret i32 %s
+}
diff --git a/test/CodeGen/X86/sse41-extractps-bitcast-1.ll b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll
new file mode 100644
index 0000000000..fc0df06062
--- /dev/null
+++ b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | not grep extractps
+
+; The non-store form of extractps puts its result into a GPR.
+; This makes it suitable for an extract from a <4 x float> that
+; is bitcasted to i32, but unsuitable for much of anything else.
+
+define float @bar(<4 x float> %v) {
+ %s = extractelement <4 x float> %v, i32 3
+ %t = add float %s, 1.0
+ ret float %t
+}
+define float @baz(<4 x float> %v) {
+ %s = extractelement <4 x float> %v, i32 3
+ ret float %s
+}
+define i32 @qux(<4 x i32> %v) {
+ %i = extractelement <4 x i32> %v, i32 3
+ ret i32 %i
+}