diff options
author | Evan Cheng <evan.cheng@apple.com> | 2009-09-28 09:14:39 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2009-09-28 09:14:39 +0000 |
commit | 5adb66a646e2ec32265263739f5b01c3f50c176a (patch) | |
tree | 38b5fb6a9d2665466cd3b88031122d9c95de6110 /lib/Target/ARM | |
parent | 705428ae4a7fa6d97a95f7811fa2e36630a43c9c (diff) |
Make ARM and Thumb2 32-bit immediate materialization into a single 32-bit pseudo
instruction. This makes it re-materializable.
Thumb2 will split it back out into two instructions so IT pass will generate the
right mask. Also, this expose opportunies to optimize the movw to a 16-bit move.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82982 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMCodeEmitter.cpp | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrFormats.td | 5 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 17 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 22 | ||||
-rw-r--r-- | lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 9 | ||||
-rw-r--r-- | lib/Target/ARM/README.txt | 8 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2ITBlockPass.cpp | 37 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2SizeReduction.cpp | 1 |
11 files changed, 83 insertions, 25 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 52af978695..a228945f65 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -864,7 +864,8 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. -ARMCC::CondCodes llvm::getInstrPredicate(MachineInstr *MI, unsigned &PredReg) { +ARMCC::CondCodes +llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { int PIdx = MI->findFirstPredOperandIdx(); if (PIdx == -1) { PredReg = 0; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 3632450ede..a13155b9fd 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -296,7 +296,7 @@ bool isJumpTableBranchOpcode(int Opc) { /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. -ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg); +ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg); int getMatchingCondBranchOpcode(int Opc); diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 57f7d38ae0..5e0c11e5da 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -596,7 +596,8 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) { unsigned Opcode = MI.getDesc().Opcode; switch (Opcode) { default: - llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");//FIXME: + llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); + // FIXME: Add support for MOVimm32. case TargetInstrInfo::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 5388197c3b..b3c00287c8 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -984,6 +984,11 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>; +class T2Ix2<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>; + + // T2Iidxldst - Thumb2 indexed load / store instructions. class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 2ab355b190..4c92891c82 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -96,7 +96,6 @@ reMaterialize(MachineBasicBlock &MBB, MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MI->getOperand(0).setReg(DestReg); - MBB.insert(I, MI); } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 4bc62733d6..1dcc4d61a9 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -100,6 +100,7 @@ def HasV5T : Predicate<"Subtarget->hasV5TOps()">; def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; def HasV6 : Predicate<"Subtarget->hasV6Ops()">; def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; +def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; @@ -938,7 +939,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), let Inst{25} = 1; } -let isAsCheapAsAMove = 1, Constraints = "$src = $dst" in +let Constraints = "$src = $dst" in def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), DPFrm, IIC_iMOVi, "movt", " $dst, $imm", @@ -1510,7 +1511,8 @@ let isReMaterializable = 1 in def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo, IIC_iMOVi, "mov", " $dst, $src", - [(set GPR:$dst, so_imm2part:$src)]>; + [(set GPR:$dst, so_imm2part:$src)]>, + Requires<[IsARM, NoV6T2]>; def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)), @@ -1519,9 +1521,14 @@ def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(i32 imm:$src), - (MOVTi16 (MOVi16 (lo16 imm:$src)), (hi16 imm:$src))>, - Requires<[IsARM, HasV6T2]>; +// 32-bit immediate using movw + movt. +// This is a single pseudo instruction to make it re-materializable. Remove +// when we can do generalized remat. +let isReMaterializable = 1 in +def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, + "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", + [(set GPR:$dst, (i32 imm:$src))]>, + Requires<[IsARM, HasV6T2]>; // TODO: add,sub,and, 3-instr forms? diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index d0fe432963..27c4ef0dbb 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -649,7 +649,8 @@ let neverHasSideEffects = 1 in def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, "mov", ".w $dst, $src", []>; -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. +let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, "mov", ".w $dst, $src", [(set GPR:$dst, t2_so_imm:$src)]>; @@ -660,10 +661,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, [(set GPR:$dst, imm0_65535:$src)]>; let Constraints = "$src = $dst" in -def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, - "movt", " $dst, $imm", - [(set GPR:$dst, - (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>; +def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, + "movt", " $dst, $imm", + [(set GPR:$dst, + (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>; //===----------------------------------------------------------------------===// // Extend Instructions. @@ -1127,7 +1128,10 @@ def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), (t2LEApcrelJT tjumptable:$dst, imm:$id)>; -// Large immediate handling. - -def : T2Pat<(i32 imm:$src), - (t2MOVTi16 (t2MOVi16 (lo16 imm:$src)), (hi16 imm:$src))>; +// 32-bit immediate using movw + movt. +// This is a single pseudo instruction to make it re-materializable. Remove +// when we can do generalized remat. +let isReMaterializable = 1 in +def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, + "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", + [(set GPR:$dst, (i32 imm:$src))]>; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index fa6720f2cd..df349e33b4 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -329,7 +329,14 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, break; } case MachineOperand::MO_Immediate: { - O << '#' << MO.getImm(); + int64_t Imm = MO.getImm(); + if (Modifier) { + if (strcmp(Modifier, "lo16") == 0) + Imm = Imm & 0xffffLL; + else if (strcmp(Modifier, "hi16") == 0) + Imm = (Imm & 0xffff0000LL) >> 16; + } + O << '#' << Imm; break; } case MachineOperand::MO_MachineBasicBlock: diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 08435c57eb..8fb1da3008 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -592,3 +592,11 @@ conditional move: it saves an instruction and a register. //===---------------------------------------------------------------------===// + +add/sub/and/or + i32 imm can be simplified by folding part of the immediate +into the operation. + +//===---------------------------------------------------------------------===// + +It might be profitable to cse MOVi16 if there are lots of 32-bit immediates +with the same bottom half. diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index e74a526afa..a06ee8eca8 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -40,12 +40,11 @@ namespace { char Thumb2ITBlockPass::ID = 0; } -static ARMCC::CondCodes getPredicate(const MachineInstr *MI, - const Thumb2InstrInfo *TII) { +static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){ unsigned Opc = MI->getOpcode(); if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) return ARMCC::AL; - return TII->getPredicate(MI); + return llvm::getInstrPredicate(MI, PredReg); } bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { @@ -54,14 +53,39 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; - ARMCC::CondCodes CC = getPredicate(MI, TII); + DebugLoc dl = MI->getDebugLoc(); + unsigned PredReg = 0; + ARMCC::CondCodes CC = getPredicate(MI, PredReg); + + // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here. + // The only reason it was a single instruction was so it could be + // re-materialized. We want to split it before this and the thumb2 + // size reduction pass to make sure the IT mask is correct and expose + // width reduction opportunities. It doesn't make sense to do this in a + // separate pass so here it is. + if (MI->getOpcode() == ARM::t2MOVi32imm) { + unsigned DstReg = MI->getOperand(0).getReg(); + bool DstDead = MI->getOperand(0).isDead(); // Is this possible? + unsigned Imm = MI->getOperand(1).getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg) + .addImm(Lo16).addImm(CC).addReg(PredReg); + BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)) + .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg); + --MBBI; + --MBBI; + MI->eraseFromParent(); + continue; + } + if (CC == ARMCC::AL) { ++MBBI; continue; } // Insert an IT instruction. - DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); ++MBBI; @@ -70,7 +94,8 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; while (MBBI != E && Pos) { - ARMCC::CondCodes NCC = getPredicate(&*MBBI, TII); + unsigned Dummy = 0; + ARMCC::CondCodes NCC = getPredicate(&*MBBI, Dummy); if (NCC == OCC) { Mask |= (1 << Pos); } else if (NCC != CC) diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index e1fb3585f7..b8879d2ed1 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -79,6 +79,7 @@ namespace { { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, // FIXME: Do we need the 16-bit 'S' variant? { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, |