aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp98
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86.td2
-rw-r--r--lib/Target/X86/X86InstrFormats.td64
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp43
-rw-r--r--lib/Target/X86/X86InstrInfo.h16
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp15
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
9 files changed, 223 insertions, 21 deletions
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 4d3dedf0e5..22285f1932 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv)
tablegen(X86GenSubtarget.inc -gen-subtarget)
set(sources
+ SSEDomainFix.cpp
X86AsmBackend.cpp
X86CodeEmitter.cpp
X86COFFMachineModuleInfo.cpp
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
new file mode 100644
index 0000000000..261b40c5ab
--- /dev/null
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -0,0 +1,98 @@
+//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SSEDomainFix pass.
+//
+// Some SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sse-domain-fix"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class SSEDomainFixPass : public MachineFunctionPass {
+ static char ID;
+ const X86InstrInfo *TII;
+
+ MachineFunction *MF;
+ MachineBasicBlock *MBB;
+public:
+ SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "SSE execution domain fixup";
+ }
+
+private:
+ void enterBasicBlock(MachineBasicBlock *MBB);
+};
+}
+
+char SSEDomainFixPass::ID = 0;
+
+void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
+ MBB = mbb;
+ DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
+}
+
+bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+
+ // If no XMM registers are used in the function, we can skip it completely.
+ bool XMMIsUsed = false;
+ for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
+ E = X86::VR128RegClass.end(); I != E; ++I)
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ XMMIsUsed = true;
+ break;
+ }
+ if (!XMMIsUsed) return false;
+
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*, 16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*,
+ SmallPtrSet<MachineBasicBlock*, 16> >
+ DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+ DFI != DFE; ++DFI) {
+ enterBasicBlock(*DFI);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr *MI = I;
+ const unsigned *equiv = 0;
+ X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
+ DEBUG(dbgs() << "-isd"[domain] << (equiv ? "* " : " ") << *MI);
+ }
+ }
+ return false;
+}
+
+FunctionPass *llvm::createSSEDomainFixPass() {
+ return new SSEDomainFixPass();
+}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index c753cf2a53..9be38a4b56 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -41,6 +41,10 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
///
FunctionPass *createX86FloatingPointStackifierPass();
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
/// createX87FPRegKillInserterPass - This function returns a pass which
/// inserts FP_REG_KILL instructions where needed.
///
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 2be51e1a13..5788e2a71f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -164,6 +164,7 @@ def X86InstrInfo : InstrInfo {
"FPFormBits",
"hasLockPrefix",
"SegOvrBits",
+ "DomainBits",
"Opcode"];
let TSFlagsShifts = [0,
6,
@@ -174,6 +175,7 @@ def X86InstrInfo : InstrInfo {
16,
19,
20,
+ 22,
24];
}
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index c06b81b10a..a811638c39 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -68,6 +68,16 @@ def CompareFP : FPFormat<5>;
def CondMovFP : FPFormat<6>;
def SpecialFP : FPFormat<7>;
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Instruction execution domain.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def SSEPackedInt : Domain<1>;
+def SSEPackedSingle : Domain<2>;
+def SSEPackedDouble : Domain<3>;
+
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
@@ -93,7 +103,7 @@ class TA { bits<4> Prefix = 14; }
class TF { bits<4> Prefix = 15; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
- string AsmStr>
+ string AsmStr, Domain d = GenericDomain>
: Instruction {
let Namespace = "X86";
@@ -119,16 +129,19 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bits<3> FPFormBits = 0;
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
+ Domain Dom = d;
+ bits<2> DomainBits = Dom.Value;
}
-class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
- : X86Inst<o, f, NoImm, outs, ins, asm> {
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm8 , outs, ins, asm> {
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -196,14 +209,16 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
-class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
// SSE2 Instruction Templates:
//
@@ -222,10 +237,12 @@ class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
//
@@ -235,12 +252,15 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ Requires<[HasSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ Requires<[HasSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE3]>;
// SSSE3 Instruction Templates:
@@ -254,10 +274,12 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
//
@@ -266,17 +288,20 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE42]>;
// SS42FI - SSE 4.2 instructions with TF prefix.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -286,7 +311,8 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE42]>;
// X86-64 Instruction templates...
//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 2323f5790a..eeb020ba3b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3658,3 +3658,46 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
+
+X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI,
+ const unsigned *&equiv) const {
+ // These are the replaceable SSE instructions. Some of these have Int variants
+ // that we don't include here. We don't want to replace instructions selected
+ // by intrinsics.
+ static const unsigned ReplaceableInstrs[][3] = {
+ //PackedInt PackedSingle PackedDouble
+ { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr },
+ { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm },
+ { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr },
+ { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr },
+ { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm },
+ { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr },
+ { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm },
+ { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr },
+ { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm },
+ { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr },
+ { X86::PORrm, X86::ORPSrm, X86::ORPDrm },
+ { X86::PORrr, X86::ORPSrr, X86::ORPDrr },
+ { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm },
+ { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr },
+ { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm },
+ { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr },
+ { X86::PXORrm, X86::XORPSrm, X86::XORPDrm },
+ { X86::PXORrr, X86::XORPSrr, X86::XORPDrr },
+ };
+
+ const SSEDomain domain =
+ SSEDomain((MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3);
+ if (domain == NotSSEDomain)
+ return domain;
+
+ // Linear search FTW!
+ const unsigned opc = MI->getOpcode();
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opc) {
+ equiv = ReplaceableInstrs[i];
+ return domain;
+ }
+ equiv = 0;
+ return domain;
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5111719a20..965740dcaf 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -398,7 +398,10 @@ namespace X86II {
FS = 1 << SegOvrShift,
GS = 2 << SegOvrShift,
- // Bits 22 -> 23 are unused
+ // Execution domain for SSE instructions in bits 22, 23.
+ // 0 in bits 22-23 means normal, non-SSE instruction. See SSEDomain below.
+ SSEDomainShift = 22,
+
OpcodeShift = 24,
OpcodeMask = 0xFF << OpcodeShift
};
@@ -486,7 +489,7 @@ class X86InstrInfo : public TargetInstrInfoImpl {
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
-
+
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -716,6 +719,15 @@ public:
///
unsigned getGlobalBaseReg(MachineFunction *MF) const;
+ /// Some SSE instructions come in variants for three domains.
+ enum SSEDomain { NotSSEDomain, PackedInt, PackedSingle, PackedDouble };
+
+ /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for
+ /// unknown instructions. If the instruction has equivalents for other
+ /// domains, equiv points to a list of opcodes for [PackedInt, PackedSingle,
+ /// PackedDouble].
+ SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f13e6f3525..06a481de25 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,11 +17,17 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
+static cl::opt<bool>
+SSEDomainFix("sse-domain-fix",
+ cl::desc("Enable fixing of SSE execution domain"),
+ cl::init(false), cl::Hidden);
+
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
@@ -169,6 +175,15 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
return true; // -print-machineinstr should print after this.
}
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (SSEDomainFix && OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+ PM.add(createSSEDomainFixPass());
+ return true;
+ }
+ return false;
+}
+
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 2bb54544d4..ae7b5b29af 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -66,6 +66,7 @@ public:
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE);
};