aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2010-03-31 00:40:13 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2010-03-31 00:40:13 +0000
commitd363b4ebc7a98483437a5a88aba01dd9facdcd01 (patch)
tree29e72babc2efbca3db84b62823b29327b6420747
parentb16df90b48474ad8e426b80c9348d508bd576ed6 (diff)
Replace V_SET0 with variants for each SSE execution domain.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99975 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp4
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp11
-rw-r--r--lib/Target/X86/X86InstrSSE.td26
-rw-r--r--test/CodeGen/X86/vec_return.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-7.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-9.ll2
-rw-r--r--test/CodeGen/X86/vec_zero.ll4
-rw-r--r--test/CodeGen/X86/vec_zero_cse.ll3
-rw-r--r--test/CodeGen/X86/xor.ll2
9 files changed, 34 insertions, 22 deletions
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 88370e630b..c851ca3fc8 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -287,7 +287,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::MOV16r0:
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index d9b0960960..a6d9c045aa 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2514,7 +2514,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
Alignment = 16;
break;
@@ -2544,11 +2546,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
SmallVector<MachineOperand,X86AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
- // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
@@ -3675,6 +3679,7 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
{ X86::ORPSrm, X86::ORPDrm, X86::PORrm },
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
};
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f74ca9dd7b..5a87314c32 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1115,15 +1115,19 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
// load of an all-zeros value if folding it would be beneficial.
// FIXME: Change encoding to pseudo!
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
-def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
-def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
@@ -3026,14 +3030,14 @@ let Predicates = [HasSSE2] in {
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
+ (MOVSSrr (v4f32 (V_SET0PS)),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
+ (MOVSSrr (v4i32 (V_SET0PI)),
(EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
}
diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll
index 66762b4a06..676be9b717 100644
--- a/test/CodeGen/X86/vec_return.ll
+++ b/test/CodeGen/X86/vec_return.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep xorps %t | count 1
+; RUN: grep pxor %t | count 1
; RUN: grep movaps %t | count 1
; RUN: not grep shuf %t
diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll
index 4cdca09c72..64bd6a3c83 100644
--- a/test/CodeGen/X86/vec_shuffle-7.ll
+++ b/test/CodeGen/X86/vec_shuffle-7.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep xorps %t | count 1
+; RUN: grep pxor %t | count 1
; RUN: not grep shufps %t
define void @test() {
diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll
index fc16a26b61..07195869b8 100644
--- a/test/CodeGen/X86/vec_shuffle-9.ll
+++ b/test/CodeGen/X86/vec_shuffle-9.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
define <4 x i32> @test(i8** %ptr) {
-; CHECK: xorps
+; CHECK: pxor
; CHECK: punpcklbw
; CHECK: punpcklwd
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index ae5af586cd..4d1f05629b 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xorps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; CHECK: xorps
define void @foo(<4 x float>* %P) {
%T = load <4 x float>* %P ; <<4 x float>> [#uses=1]
%S = fadd <4 x float> zeroinitializer, %T ; <<4 x float>> [#uses=1]
@@ -7,6 +8,7 @@ define void @foo(<4 x float>* %P) {
ret void
}
+; CHECK: pxor
define void @bar(<4 x i32>* %P) {
%T = load <4 x i32>* %P ; <<4 x i32>> [#uses=1]
%S = add <4 x i32> zeroinitializer, %T ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 296378c6e9..3b15d4cc40 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 2
; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
@M1 = external global <1 x i64>
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index 9bfff8a06a..f270d9d56e 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -7,7 +7,7 @@ define <4 x i32> @test1() nounwind {
ret <4 x i32> %tmp
; X32: test1:
-; X32: xorps %xmm0, %xmm0
+; X32: pxor %xmm0, %xmm0
; X32: ret
}