diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/X86/SSEDomainFix.cpp | 98 | ||||
-rw-r--r-- | lib/Target/X86/X86.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 64 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 43 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.h | 1 |
9 files changed, 223 insertions, 21 deletions
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 4d3dedf0e5..22285f1932 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) set(sources + SSEDomainFix.cpp X86AsmBackend.cpp X86CodeEmitter.cpp X86COFFMachineModuleInfo.cpp diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp new file mode 100644 index 0000000000..261b40c5ab --- /dev/null +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -0,0 +1,98 @@ +//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SSEDomainFix pass. +// +// Some SSE instructions like mov, and, or, xor are available in different +// variants for different operand types. These variant instructions are +// equivalent, but on Nehalem and newer cpus there is extra latency +// transferring data between integer and floating point domains. +// +// This pass changes the variant instructions to minimize domain crossings. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sse-domain-fix" +#include "X86InstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class SSEDomainFixPass : public MachineFunctionPass { + static char ID; + const X86InstrInfo *TII; + + MachineFunction *MF; + MachineBasicBlock *MBB; +public: + SSEDomainFixPass() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "SSE execution domain fixup"; + } + +private: + void enterBasicBlock(MachineBasicBlock *MBB); +}; +} + +char SSEDomainFixPass::ID = 0; + +void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) { + MBB = mbb; + DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n"); +} + +bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo()); + + // If no XMM registers are used in the function, we can skip it completely. + bool XMMIsUsed = false; + for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), + E = X86::VR128RegClass.end(); I != E; ++I) + if (MF->getRegInfo().isPhysRegUsed(*I)) { + XMMIsUsed = true; + break; + } + if (!XMMIsUsed) return false; + + MachineBasicBlock *Entry = MF->begin(); + SmallPtrSet<MachineBasicBlock*, 16> Visited; + for (df_ext_iterator<MachineBasicBlock*, + SmallPtrSet<MachineBasicBlock*, 16> > + DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); + DFI != DFE; ++DFI) { + enterBasicBlock(*DFI); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + MachineInstr *MI = I; + const unsigned *equiv = 0; + X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv); + DEBUG(dbgs() << "-isd"[domain] << (equiv ? "* " : " ") << *MI); + } + } + return false; +} + +FunctionPass *llvm::createSSEDomainFixPass() { + return new SSEDomainFixPass(); +} diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index c753cf2a53..9be38a4b56 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -41,6 +41,10 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM, /// FunctionPass *createX86FloatingPointStackifierPass(); +/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain +/// crossings. +FunctionPass *createSSEDomainFixPass(); + /// createX87FPRegKillInserterPass - This function returns a pass which /// inserts FP_REG_KILL instructions where needed. /// diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 2be51e1a13..5788e2a71f 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -164,6 +164,7 @@ def X86InstrInfo : InstrInfo { "FPFormBits", "hasLockPrefix", "SegOvrBits", + "DomainBits", "Opcode"]; let TSFlagsShifts = [0, 6, @@ -174,6 +175,7 @@ def X86InstrInfo : InstrInfo { 16, 19, 20, + 22, 24]; } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index c06b81b10a..a811638c39 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -68,6 +68,16 @@ def CompareFP : FPFormat<5>; def CondMovFP : FPFormat<6>; def SpecialFP : FPFormat<7>; +// Class specifying the SSE execution domain, used by the SSEDomainFix pass. +// Instruction execution domain. +class Domain<bits<2> val> { + bits<2> Value = val; +} +def GenericDomain : Domain<0>; +def SSEPackedInt : Domain<1>; +def SSEPackedSingle : Domain<2>; +def SSEPackedDouble : Domain<3>; + // Prefix byte classes which are used to indicate to the ad-hoc machine code // emitter that various prefix bytes are required. class OpSize { bit hasOpSizePrefix = 1; } @@ -93,7 +103,7 @@ class TA { bits<4> Prefix = 14; } class TF { bits<4> Prefix = 15; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, - string AsmStr> + string AsmStr, Domain d = GenericDomain> : Instruction { let Namespace = "X86"; @@ -119,16 +129,19 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bits<3> FPFormBits = 0; bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? bits<2> SegOvrBits = 0; // Segment override prefix. + Domain Dom = d; + bits<2> DomainBits = Dom.Value; } -class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> - : X86Inst<o, f, NoImm, outs, ins, asm> { +class I<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern, Domain d = GenericDomain> + : X86Inst<o, f, NoImm, outs, ins, asm, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm8 , outs, ins, asm> { + list<dag> pattern, Domain d = GenericDomain> + : X86Inst<o, f, Imm8, outs, ins, asm, d> { let Pattern = pattern; let CodeSize = 3; } @@ -196,14 +209,16 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; -class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, +class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + Requires<[HasSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + Requires<[HasSSE1]>; // SSE2 Instruction Templates: // @@ -222,10 +237,12 @@ class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, + Requires<[HasSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, + Requires<[HasSSE2]>; // SSE3 Instruction Templates: // @@ -235,12 +252,15 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS, + Requires<[HasSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD, + Requires<[HasSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, + Requires<[HasSSE3]>; // SSSE3 Instruction Templates: @@ -254,10 +274,12 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + Requires<[HasSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: // @@ -266,17 +288,20 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, // class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + Requires<[HasSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + Requires<[HasSSE41]>; // SSE4.2 Instruction Templates: // // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + Requires<[HasSSE42]>; // SS42FI - SSE 4.2 instructions with TF prefix. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, @@ -286,7 +311,8 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + Requires<[HasSSE42]>; // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2323f5790a..eeb020ba3b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3658,3 +3658,46 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { X86FI->setGlobalBaseReg(GlobalBaseReg); return GlobalBaseReg; } + +X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI, + const unsigned *&equiv) const { + // These are the replaceable SSE instructions. Some of these have Int variants + // that we don't include here. We don't want to replace instructions selected + // by intrinsics. + static const unsigned ReplaceableInstrs[][3] = { + //PackedInt PackedSingle PackedDouble + { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr }, + { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm }, + { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr }, + { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr }, + { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm }, + { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr }, + { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm }, + { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr }, + { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm }, + { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr }, + { X86::PORrm, X86::ORPSrm, X86::ORPDrm }, + { X86::PORrr, X86::ORPSrr, X86::ORPDrr }, + { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm }, + { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr }, + { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm }, + { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr }, + { X86::PXORrm, X86::XORPSrm, X86::XORPDrm }, + { X86::PXORrr, X86::XORPSrr, X86::XORPDrr }, + }; + + const SSEDomain domain = + SSEDomain((MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3); + if (domain == NotSSEDomain) + return domain; + + // Linear search FTW! + const unsigned opc = MI->getOpcode(); + for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) + if (ReplaceableInstrs[i][domain-1] == opc) { + equiv = ReplaceableInstrs[i]; + return domain; + } + equiv = 0; + return domain; +} diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 5111719a20..965740dcaf 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -398,7 +398,10 @@ namespace X86II { FS = 1 << SegOvrShift, GS = 2 << SegOvrShift, - // Bits 22 -> 23 are unused + // Execution domain for SSE instructions in bits 22, 23. + // 0 in bits 22-23 means normal, non-SSE instruction. See SSEDomain below. + SSEDomainShift = 22, + OpcodeShift = 24, OpcodeMask = 0xFF << OpcodeShift }; @@ -486,7 +489,7 @@ class X86InstrInfo : public TargetInstrInfoImpl { /// MemOp2RegOpTable - Load / store unfolding opcode map. /// DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable; - + public: explicit X86InstrInfo(X86TargetMachine &tm); @@ -716,6 +719,15 @@ public: /// unsigned getGlobalBaseReg(MachineFunction *MF) const; + /// Some SSE instructions come in variants for three domains. + enum SSEDomain { NotSSEDomain, PackedInt, PackedSingle, PackedDouble }; + + /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for + /// unknown instructions. If the instruction has equivalents for other + /// domains, equiv points to a list of opcodes for [PackedInt, PackedSingle, + /// PackedDouble]. + SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f13e6f3525..06a481de25 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -17,11 +17,17 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; +static cl::opt<bool> +SSEDomainFix("sse-domain-fix", + cl::desc("Enable fixing of SSE execution domain"), + cl::init(false), cl::Hidden); + static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -169,6 +175,15 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, return true; // -print-machineinstr should print after this. } +bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (SSEDomainFix && OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { + PM.add(createSSEDomainFixPass()); + return true; + } + return false; +} + bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 2bb54544d4..ae7b5b29af 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -66,6 +66,7 @@ public: virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); }; |