diff options
author | Andrew Trick <atrick@apple.com> | 2012-06-29 07:10:41 +0000 |
---|---|---|
committer | Andrew Trick <atrick@apple.com> | 2012-06-29 07:10:41 +0000 |
commit | 3e4b3b9043b1ced24e07d8d1174feeee06c6912e (patch) | |
tree | f31c2d3c63fc1da8b3b598c433759ae8391e62ed | |
parent | 94e3b388e561ce980c861e092bf378bf40202268 (diff) |
Revert "Make NumMicroOps a variable in the subtarget's instruction itinerary."
This reverts commit r159406. I noticed a performance regression so I'll back out for now.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159411 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/MC/MCInstrItineraries.h | 3 | ||||
-rw-r--r-- | include/llvm/Target/TargetInstrInfo.h | 4 | ||||
-rw-r--r-- | include/llvm/Target/TargetSchedule.td | 19 | ||||
-rw-r--r-- | lib/CodeGen/TargetInstrInfoImpl.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 18 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 22 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA8.td | 34 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 35 | ||||
-rw-r--r-- | utils/TableGen/SubtargetEmitter.cpp | 2 |
9 files changed, 62 insertions, 79 deletions
diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h index d8587068ae..1003fb586c 100644 --- a/include/llvm/MC/MCInstrItineraries.h +++ b/include/llvm/MC/MCInstrItineraries.h @@ -95,7 +95,7 @@ struct InstrStage { /// operands are read and written. /// struct InstrItinerary { - int NumMicroOps; ///< # of micro-ops, -1 means it's variable + unsigned NumMicroOps; ///< # of micro-ops, 0 means it's variable unsigned FirstStage; ///< Index of first stage in itinerary unsigned LastStage; ///< Index of last + 1 stage in itinerary unsigned FirstOperandCycle; ///< Index of first operand rd/wr @@ -323,6 +323,7 @@ public: } }; + } // End llvm namespace #endif diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 87f1fe5b0b..4e73139881 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -648,9 +648,7 @@ public: } /// getNumMicroOps - Return the number of u-operations the given machine - /// instruction will be decoded to on the target cpu. The itinerary's - /// IssueWidth is the number of microops that can be dispatched each - /// cycle. An instruction with zero microops takes no dispatch resources. + /// instruction will be decoded to on the target cpu. virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const = 0; diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index caa5a84c83..e22e67cdac 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -73,20 +73,20 @@ class InstrStage<int cycles, list<FuncUnit> units, // across all chip sets. Thus a new chip set can be added without modifying // instruction information. // -class InstrItinClass; +// NumMicroOps represents the number of micro-operations that each instruction +// in the class are decoded to. If the number is zero, then it means the +// instruction can decode into variable number of micro-ops and it must be +// determined dynamically. +// +class InstrItinClass<int ops = 1> { + int NumMicroOps = ops; +} def NoItinerary : InstrItinClass; //===----------------------------------------------------------------------===// // Instruction itinerary data - These values provide a runtime map of an // instruction itinerary class (name) to its itinerary data. // -// NumMicroOps represents the number of micro-operations that each instruction -// in the class are decoded to. If the number is zero, then it means the -// instruction can decode into variable number of micro-ops and it must be -// determined dynamically. This directly relates to the itineraries -// global IssueWidth property, which constrains the number of microops -// that can issue per cycle. -// // OperandCycles are optional "cycle counts". They specify the cycle after // instruction issue the values which correspond to specific operand indices // are defined or read. Bypasses are optional "pipeline forwarding pathes", if @@ -106,9 +106,8 @@ def NoItinerary : InstrItinClass; // is reduced by 1. class InstrItinData<InstrItinClass Class, list<InstrStage> stages, list<int> operandcycles = [], - list<Bypass> bypasses = [], int uops = 1> { + list<Bypass> bypasses = []> { InstrItinClass TheClass = Class; - int NumMicroOps = uops; list<InstrStage> Stages = stages; list<int> OperandCycles = operandcycles; list<Bypass> Bypasses = bypasses; diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 54be88a8bb..7af08f591f 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -560,8 +560,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, return 1; unsigned Class = MI->getDesc().getSchedClass(); - int UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps >= 0) + unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps) return UOps; // The # of u-ops is dynamically determined. The specific target should diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index b3fef29093..3a180dfa27 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2176,9 +2176,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); - int ItinUOps = ItinData->Itineraries[Class].NumMicroOps; - if (ItinUOps >= 0) - return ItinUOps; + unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps) + return UOps; unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2252,19 +2252,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, return 2; // 4 registers would be issued: 2, 2. // 5 registers would be issued: 2, 2, 1. - int A8UOps = (NumRegs / 2); + UOps = (NumRegs / 2); if (NumRegs % 2) - ++A8UOps; - return A8UOps; + ++UOps; + return UOps; } else if (Subtarget.isCortexA9()) { - int A9UOps = (NumRegs / 2); + UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. if ((NumRegs % 2) || !MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 8) - ++A9UOps; - return A9UOps; + ++UOps; + return UOps; } else { // Assume the worst. return NumRegs; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 81d2fa37c2..b9a07f1ee6 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass; def IIC_iLoad_d_i : InstrItinClass; def IIC_iLoad_d_r : InstrItinClass; def IIC_iLoad_d_ru : InstrItinClass; -def IIC_iLoad_m : InstrItinClass; -def IIC_iLoad_mu : InstrItinClass; -def IIC_iLoad_mBr : InstrItinClass; -def IIC_iPop : InstrItinClass; -def IIC_iPop_Br : InstrItinClass; +def IIC_iLoad_m : InstrItinClass<0>; // micro-coded +def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded +def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded +def IIC_iPop : InstrItinClass<0>; // micro-coded +def IIC_iPop_Br : InstrItinClass<0>; // micro-coded def IIC_iLoadiALU : InstrItinClass; def IIC_iStore_i : InstrItinClass; def IIC_iStore_r : InstrItinClass; @@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass; def IIC_iStore_d_i : InstrItinClass; def IIC_iStore_d_r : InstrItinClass; def IIC_iStore_d_ru : InstrItinClass; -def IIC_iStore_m : InstrItinClass; -def IIC_iStore_mu : InstrItinClass; +def IIC_iStore_m : InstrItinClass<0>; // micro-coded +def IIC_iStore_mu : InstrItinClass<0>; // micro-coded def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; @@ -126,12 +126,12 @@ def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; -def IIC_fpLoad_m : InstrItinClass; -def IIC_fpLoad_mu : InstrItinClass; +def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded +def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; -def IIC_fpStore_m : InstrItinClass; -def IIC_fpStore_mu : InstrItinClass; +def IIC_fpStore_m : InstrItinClass<0>; // micro-coded +def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded def IIC_VLD1 : InstrItinClass; def IIC_VLD1x2 : InstrItinClass; def IIC_VLD1x3 : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 61de00a208..eb1083ca23 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -155,30 +155,28 @@ def CortexA8Itineraries : MultiIssueItineraries< // Load multiple, def is the 5th operand. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], - [1, 1, 1, 1, 3], [], -1>, // dynamic uops + InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>, // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>, - InstrStage<3, [A8_LSPipe]>], - [2, 1, 1, 1, 3], [], -1>, // dynamic uops + InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>, // // Load multiple plus branch InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 2, 1, 1, 3], [], -1>, // dynamic uops + [1, 2, 1, 1, 3]>, // // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>, - InstrStage<3, [A8_LSPipe]>], - [1, 1, 3], [], -1>, // dynamic uops + InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>, // // Push, def is the 3th operand. InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 1, 3], [], -1>, // dynamic uops + [1, 1, 3]>, + // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, @@ -233,13 +231,12 @@ def CortexA8Itineraries : MultiIssueItineraries< // Store multiple. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], - [], [], -1>, // dynamic uops + InstrStage<2, [A8_LSPipe]>]>, // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], - [2], [], -1>, // dynamic uops + InstrStage<2, [A8_LSPipe]>], [2]>, + // // Preload InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, @@ -400,16 +397,14 @@ def CortexA8Itineraries : MultiIssueItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], - [1, 1, 1, 2], [], -1>, // dynamic uops + InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>, // // FP Load Multiple + update InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], - [2, 1, 1, 1, 2], [], -1>, // dynamic uops + InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>, // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, @@ -428,16 +423,15 @@ def CortexA8Itineraries : MultiIssueItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], - [1, 1, 1, 1], [], -1>, // dynamic uops + InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>, // // FP Store Multiple + update InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], - [2, 1, 1, 1, 1], [], -1>, // dynamic uops + InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>, + // NEON // Issue through integer pipeline, and execute in NEON unit. // diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 1677ba6a98..a00577bf3d 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -284,8 +284,7 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], - -1>, // dynamic uops + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -293,8 +292,7 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], - -1>, // dynamic uops + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, // // Load multiple plus branch InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -303,8 +301,7 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 2, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], - -1>, // dynamic uops + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, // // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -312,8 +309,7 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass], - -1>, // dynamic uops + [NoBypass, NoBypass, A9_LdBypass]>, // // Pop + branch, def is the 3rd operand. InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -322,8 +318,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass], - -1>, // dynamic uops + [NoBypass, NoBypass, A9_LdBypass]>, + // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -417,15 +413,14 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>], - [], [], -1>, // dynamic uops + InstrStage<2, [A9_LSUnit]>]>, // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>], - [2], [], -1>, // dynamic uops + InstrStage<2, [A9_LSUnit]>], [2]>, + // // Preload InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, @@ -722,8 +717,7 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [1, 1, 1, 1], [], -1>, // dynamic uops + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, // // FP Load Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -732,8 +726,7 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 1, 1, 1], [], -1>, // dynamic uops + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -760,8 +753,7 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [1, 1, 1, 1], [], -1>, // dynamic uops + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, // // FP Store Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -770,8 +762,7 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 1, 1, 1], [], -1>, // dynamic uops + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, // NEON // VLD1 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index adfd416fc0..59464d2940 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -498,7 +498,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS, unsigned Find = ItinClassesMap[Name]; // Set up itinerary as location and location + stage count - unsigned NumUOps = ItinData->getValueAsInt("NumMicroOps"); + unsigned NumUOps = ItinClassList[Find]->getValueAsInt("NumMicroOps"); InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages, FindOperandCycle, FindOperandCycle + NOperandCycles}; |