diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/ExactHazardRecognizer.cpp | 54 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 54 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 46 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV7.td | 32 |
5 files changed, 92 insertions, 96 deletions
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 48043f286c..8bac08a4a0 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -34,12 +34,12 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData // If the begin stage of an itinerary has 0 cycles and units, // then we have reached the end of the itineraries. const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); - if ((IS->Cycles == 0) && (IS->Units == 0)) + if ((IS->getCycles() == 0) && (IS->getUnits() == 0)) break; unsigned ItinDepth = 0; for (; IS != E; ++IS) - ItinDepth += std::max(1U, IS->Cycles); + ItinDepth += IS->getCycles(); ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); } @@ -89,27 +89,25 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { - // If the stages cycles are 0, then we must have the FU free in - // the current cycle, but we don't advance the cycle time . - unsigned StageCycles = std::max(1U, IS->Cycles); - // We must find one of the stage's units free for every cycle the - // stage is occupied. - for (unsigned int i = 0; i < StageCycles; ++i) { - assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle); - unsigned freeUnits = IS->Units & ~Scoreboard[index]; + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; if (!freeUnits) { - DEBUG(errs() << "*** Hazard in cycle " << cycle << ", "); + DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(errs() << "SU(" << SU->NodeNum << "): "); DEBUG(SU->getInstr()->dump()); return Hazard; } - - if (IS->Cycles > 0) - ++cycle; } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); } return NoHazard; @@ -123,17 +121,15 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { - // If the stages cycles are 0, then we must reserve the FU in the - // current cycle, but we don't advance the cycle time . - unsigned StageCycles = std::max(1U, IS->Cycles); - // We must reserve one of the stage's units for every cycle the - // stage is occupied. - for (unsigned int i = 0; i < StageCycles; ++i) { - assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle); - unsigned freeUnits = IS->Units & ~Scoreboard[index]; + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; // reduce to a single unit unsigned freeUnit = 0; @@ -144,10 +140,10 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { assert(freeUnit && "No function unit available!"); Scoreboard[index] |= freeUnit; - - if (IS->Cycles > 0) - ++cycle; } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); } DEBUG(dumpScoreboard()); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 0985265e19..8e5a01d4dd 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -615,7 +615,7 @@ let isReturn = 1, isTerminator = 1, mayLoad = 1 in []>; // On non-Darwin platforms R9 is callee-saved. -let isCall = 1, Itinerary = IIC_Br, +let isCall = 1, Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -652,7 +652,7 @@ let isCall = 1, Itinerary = IIC_Br, } // On Darwin R9 is call-clobbered. -let isCall = 1, Itinerary = IIC_Br, +let isCall = 1, Defs = [R0, R1, R2, R3, R9, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -685,7 +685,7 @@ let isCall = 1, Itinerary = IIC_Br, } } -let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { +let isBranch = 1, isTerminator = 1 in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { let isPredicable = 1 in @@ -1057,7 +1057,7 @@ defm BIC : AsI1_bin_irs<0b1110, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, NoItinerary, + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALU, "bfc", " $dst, $imm", "$src = $dst", [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { @@ -1084,16 +1084,16 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm), // let isCommutable = 1 in -def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "mul", " $dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "mla", " $dst, $a, $b, $c", + IIC_iMPY, "mla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "mls", " $dst, $a, $b, $c", + IIC_iMPY, "mls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6T2]>; @@ -1101,32 +1101,32 @@ def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), let neverHasSideEffects = 1 in { let isCommutable = 1 in { def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "smull", " $ldst, $hdst, $a, $b", []>; def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umull", " $ldst, $hdst, $a, $b", []>; } // Multiply + accumulate def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "smlal", " $ldst, $hdst, $a, $b", []>; def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umlal", " $ldst, $hdst, $a, $b", []>; def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umaal", " $ldst, $hdst, $a, $b", []>, Requires<[IsARM, HasV6]>; } // neverHasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, "smmul", " $dst, $a, $b", + IIC_iMPY, "smmul", " $dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1134,7 +1134,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), } def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "smmla", " $dst, $a, $b, $c", + IIC_iMPY, "smmla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1142,7 +1142,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "smmls", " $dst, $a, $b, $c", + IIC_iMPY, "smmls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b1101; @@ -1150,7 +1150,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), multiclass AI_smul<string opc, PatFrag opnode> { def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1159,7 +1159,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1168,7 +1168,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1177,7 +1177,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1186,7 +1186,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1195,7 +1195,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1207,7 +1207,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { multiclass AI_smla<string opc, PatFrag opnode> { def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>, @@ -1217,7 +1217,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1226,7 +1226,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1235,7 +1235,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1244,7 +1244,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1253,7 +1253,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 0da798e15d..1a82331980 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -508,7 +508,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iALU, // multiply register let isCommutable = 1 in -def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, +def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMPY, "mul", " $dst, $rhs", [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0ef9cc03e0..5800a43047 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -808,80 +808,80 @@ def : T2Pat<(t2_so_imm_not:$src), // Multiply Instructions. // let isCommutable = 1 in -def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "mul", " $dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; -def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "mla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; -def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "mls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>; // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smull", " $ldst, $hdst, $a, $b", []>; -def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umull", " $ldst, $hdst, $a, $b", []>; } // Multiply + accumulate -def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smlal", " $ldst, $hdst, $a, $b", []>; -def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umlal", " $ldst, $hdst, $a, $b", []>; -def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umaal", " $ldst, $hdst, $a, $b", []>; } // neverHasSideEffects // Most significant word multiply -def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smmul", " $dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>; -def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "smmla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>; -def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "smmls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>; multiclass T2I_smul<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>; - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>; - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>; - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>; - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>; - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>; @@ -889,33 +889,33 @@ multiclass T2I_smul<string opc, PatFrag opnode> { multiclass T2I_smla<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>; - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>; - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>; - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16)))))]>; - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>; - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 8a7b42eb72..a789d71147 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -16,34 +16,34 @@ def CortexA8Itineraries : ProcessorItineraries<[ // two fully-pipelined integer ALU pipelines InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, // one fully-pipelined integer Multiply pipeline - // function units are used in alpha order, so use FU_Pipe1 - // for the Multiple pipeline - InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>, + // function units are reserved by the scheduler in reverse alpha order, + // so use FU_Pipe0 for the Multiple pipeline + InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>, // loads have an extra cycle of latency, but are fully pipelined - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Issue], 0>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, // fully-pipelined stores - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_iStore , [InstrStage<1, [FU_Issue], 0>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, // no delay slots, so the latency of a branch is unimportant InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - // VFP ALU is not pipelined so stall all issues - // FIXME assume NFP pipeline and 7 cycle non-pipelined latency - InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>, + // NFP ALU is not pipelined so stall all issues + InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0], 0>, + InstrStage<7, [FU_Pipe1], 0>]>, // VFP MPY is not pipelined so stall all issues - // FIXME assume NFP pipeline and 7 cycle non-pipelined latency - InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>, + InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0], 0>, + InstrStage<7, [FU_Pipe1], 0>]>, // loads have an extra cycle of latency, but are fully pipelined - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Issue], 0>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Issue], 0>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>]> ]>; |