aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManman Ren <mren@apple.com>2012-06-01 19:33:18 +0000
committerManman Ren <mren@apple.com>2012-06-01 19:33:18 +0000
commit68f25571e759c1fcf2da206109647259f49f7416 (patch)
tree861c26c6230399c7a15e25af8e868f16aafb9ad2
parenta0c5e6ceb53c0f77906d813a354b19e72d26950c (diff)
ARM: properly handle alignment for struct byval.
Factor out the expansion code into a function. This change is to be enabled in clang. rdar://9877866 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157830 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/FastISel.h9
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp24
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp514
-rw-r--r--lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td8
5 files changed, 308 insertions, 250 deletions
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index e57c8b18c6..9fe743eddb 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -299,6 +299,15 @@ protected:
unsigned Op1, bool Op1IsKill,
uint64_t Imm);
+ /// FastEmitInst_rrii - Emit a MachineInstr with two register operands,
+ /// two immediates operands, and a result register in the given register
+ /// class.
+ unsigned FastEmitInst_rrii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm1, uint64_t Imm2);
+
/// FastEmitInst_i - Emit a MachineInstr with a single immediate
/// operand, and a result register in the given register class.
unsigned FastEmitInst_i(unsigned MachineInstrOpcode,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 07687ef8b5..6464cf4ecc 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1306,6 +1306,30 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4528701576..8c71021c6c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1434,9 +1434,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
MVT::i32);
+ SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, Dst, Src, SizeNode};
+ SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops, array_lengthof(Ops)));
}
@@ -6239,6 +6240,270 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
+MachineBasicBlock *ARMTargetLowering::
+EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+ // This pseudo instruction has 3 operands: dst, src, size
+ // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
+ // Otherwise, we will generate unrolled scalar copies.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned src = MI->getOperand(1).getReg();
+ unsigned SizeVal = MI->getOperand(2).getImm();
+ unsigned Align = MI->getOperand(3).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned ldrOpc, strOpc, UnitSize;
+
+ const TargetRegisterClass *TRC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+
+ if (Align & 1) {
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+ UnitSize = 1;
+ } else if (Align & 2) {
+ ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
+ strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
+ UnitSize = 2;
+ } else {
+ ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ UnitSize = 4;
+ }
+ unsigned BytesLeft = SizeVal % UnitSize;
+ unsigned LoopSize = SizeVal - BytesLeft;
+
+ if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
+ // Use LDR and STR to copy.
+ // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
+ // [destOut] = STR_POST(scratch, destIn, UnitSize)
+ unsigned srcIn = src;
+ unsigned destIn = dest;
+ for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addImm(UnitSize));
+ } else {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
+ .addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(UnitSize));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ // Handle the leftover bytes with LDRB and STRB.
+ // [scratch, srcOut] = LDRB_POST(srcIn, 1)
+ // [destOut] = STRB_POST(scratch, destIn, 1)
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ } else {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+ }
+
+ // Expand the pseudo op to a loop.
+ // thisMBB:
+ // ...
+ // movw varEnd, # --> with thumb2
+ // movt varEnd, #
+ // ldrcp varEnd, idx --> without thumb2
+ // fallthrough --> loopMBB
+ // loopMBB:
+ // PHI varPhi, varEnd, varLoop
+ // PHI srcPhi, src, srcLoop
+ // PHI destPhi, dst, destLoop
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
+ // subs varLoop, varPhi, #UnitSize
+ // bne loopMBB
+ // fallthrough --> exitMBB
+ // exitMBB:
+ // epilogue to handle left-over bytes
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Load an immediate to varEnd.
+ unsigned varEnd = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ unsigned VReg1 = varEnd;
+ if ((LoopSize & 0xFFFF0000) != 0)
+ VReg1 = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(LoopSize & 0xFFFF));
+
+ if ((LoopSize & 0xFFFF0000) != 0)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
+ .addReg(VReg1)
+ .addImm(LoopSize >> 16));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getTargetData()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ }
+ BB->addSuccessor(loopMBB);
+
+ // Generate the loop body:
+ // varPhi = PHI(varLoop, varEnd)
+ // srcPhi = PHI(srcLoop, src)
+ // destPhi = PHI(destLoop, dst)
+ MachineBasicBlock *entryBB = BB;
+ BB = loopMBB;
+ unsigned varLoop = MRI.createVirtualRegister(TRC);
+ unsigned varPhi = MRI.createVirtualRegister(TRC);
+ unsigned srcLoop = MRI.createVirtualRegister(TRC);
+ unsigned srcPhi = MRI.createVirtualRegister(TRC);
+ unsigned destLoop = MRI.createVirtualRegister(TRC);
+ unsigned destPhi = MRI.createVirtualRegister(TRC);
+
+ BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
+ .addReg(varLoop).addMBB(loopMBB)
+ .addReg(varEnd).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
+ .addReg(srcLoop).addMBB(loopMBB)
+ .addReg(src).addMBB(entryBB);
+ BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
+ .addReg(destLoop).addMBB(loopMBB)
+ .addReg(dest).addMBB(entryBB);
+
+ // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
+ // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(scratch).addReg(destPhi)
+ .addImm(UnitSize));
+ } else {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
+ .addImm(UnitSize));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(scratch).addReg(destPhi)
+ .addReg(0).addImm(UnitSize));
+ }
+
+ // Decrement loop variable by UnitSize.
+ MachineInstrBuilder MIB = BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ // loopMBB can loop back to loopMBB or fall through to exitMBB.
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // Add epilogue to handle BytesLeft.
+ BB = exitMBB;
+ MachineInstr *StartOfExit = exitMBB->begin();
+ ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
+
+ // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
+ // [destOut] = STRB_POST(scratch, destLoop, 1)
+ unsigned srcIn = srcLoop;
+ unsigned destIn = destLoop;
+ for (unsigned i = 0; i < BytesLeft; i++) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addImm(1));
+ } else {
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
+ TII->get(ldrOpc),scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
+
+ AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
+ .addReg(scratch).addReg(destIn)
+ .addReg(0).addImm(1));
+ }
+ srcIn = srcOut;
+ destIn = destOut;
+ }
+
+ MI->eraseFromParent(); // The instruction is gone now.
+ return BB;
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -6594,252 +6859,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// return last added BB
return SinkBB;
}
- case ARM::COPY_STRUCT_BYVAL_I32: {
+ case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
- // This pseudo instruction has 3 operands: dst, src, size
- // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
- // Otherwise, we will generate unrolled scalar copies.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned dest = MI->getOperand(0).getReg();
- unsigned src = MI->getOperand(1).getReg();
- unsigned size = MI->getOperand(2).getImm();
- DebugLoc dl = MI->getDebugLoc();
- unsigned BytesLeft = size & 3;
- unsigned LoopSize = size - BytesLeft;
-
- bool isThumb2 = Subtarget->isThumb2();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- unsigned strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
-
- if (size <= Subtarget->getMaxInlineSizeThreshold()) {
- // Use LDR and STR to copy.
- // [scratch, srcOut] = LDR_POST(srcIn, 4)
- // [destOut] = STR_POST(scratch, destIn, 4)
- unsigned srcIn = src;
- unsigned destIn = dest;
- for (unsigned i = 0; i < LoopSize; i+=4) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(4));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(4));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(4));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(4));
- }
- srcIn = srcOut;
- destIn = destOut;
- }
-
- // Handle the leftover bytes with LDRB and STRB.
- // [scratch, srcOut] = LDRB_POST(srcIn, 1)
- // [destOut] = STRB_POST(scratch, destIn, 1)
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
- for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
- srcIn = srcOut;
- destIn = destOut;
- }
- MI->eraseFromParent(); // The instruction is gone now.
- return BB;
- }
-
- // Expand the pseudo op to a loop.
- // thisMBB:
- // ...
- // movw varEnd, # --> with thumb2
- // movt varEnd, #
- // ldrcp varEnd, idx --> without thumb2
- // fallthrough --> loopMBB
- // loopMBB:
- // PHI varPhi, varEnd, varLoop
- // PHI srcPhi, src, srcLoop
- // PHI destPhi, dst, destLoop
- // [scratch, srcLoop] = LDR_POST(srcPhi, 4)
- // [destLoop] = STR_POST(scratch, destPhi, 4)
- // subs varLoop, varPhi, #4
- // bne loopMBB
- // fallthrough --> exitMBB
- // exitMBB:
- // epilogue to handle left-over bytes
- // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
- // [destOut] = STRB_POST(scratch, destLoop, 1)
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Load an immediate to varEnd.
- unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- unsigned VReg1 = varEnd;
- if ((LoopSize & 0xFFFF0000) != 0)
- VReg1 = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(LoopSize & 0xFFFF));
-
- if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(VReg1)
- .addImm(LoopSize >> 16));
- } else {
- MachineConstantPool *ConstantPool = MF->getConstantPool();
- Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
- const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
-
- // MachineConstantPool wants an explicit alignment.
- unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
- if (Align == 0)
- Align = getTargetData()->getTypeAllocSize(C->getType());
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
- .addReg(varEnd, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
- }
- BB->addSuccessor(loopMBB);
-
- // Generate the loop body:
- // varPhi = PHI(varLoop, varEnd)
- // srcPhi = PHI(srcLoop, src)
- // destPhi = PHI(destLoop, dst)
- MachineBasicBlock *entryBB = BB;
- BB = loopMBB;
- unsigned varLoop = MRI.createVirtualRegister(TRC);
- unsigned varPhi = MRI.createVirtualRegister(TRC);
- unsigned srcLoop = MRI.createVirtualRegister(TRC);
- unsigned srcPhi = MRI.createVirtualRegister(TRC);
- unsigned destLoop = MRI.createVirtualRegister(TRC);
- unsigned destPhi = MRI.createVirtualRegister(TRC);
-
- BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
- .addReg(varLoop).addMBB(loopMBB)
- .addReg(varEnd).addMBB(entryBB);
- BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
- .addReg(srcLoop).addMBB(loopMBB)
- .addReg(src).addMBB(entryBB);
- BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
- .addReg(destLoop).addMBB(loopMBB)
- .addReg(dest).addMBB(entryBB);
-
- // [scratch, srcLoop] = LDR_POST(srcPhi, 4)
- // [destLoop] = STR_POST(scratch, destPhi, 4)
- unsigned scratch = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(4));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addImm(4));
- } else {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0).addImm(4));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addReg(0).addImm(4));
- }
-
- // Decrement loop variable by 4.
- MachineInstrBuilder MIB = BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(4)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
-
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- // loopMBB can loop back to loopMBB or fall through to exitMBB.
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // Add epilogue to handle BytesLeft.
- BB = exitMBB;
- MachineInstr *StartOfExit = exitMBB->begin();
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
-
- // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
- // [destOut] = STRB_POST(scratch, destLoop, 1)
- unsigned srcIn = srcLoop;
- unsigned destIn = destLoop;
- for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
- srcIn = srcOut;
- destIn = destOut;
- }
-
- MI->eraseFromParent(); // The instruction is gone now.
- return BB;
- }
+ return EmitStructByval(MI, BB);
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index d11171ef11..a113878978 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -530,6 +530,9 @@ namespace llvm {
MachineBasicBlock *MBB) const;
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index f700ec6eec..cf7d07c1d3 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -18,9 +18,9 @@
// Type profiles.
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
-def SDT_ARMStructByVal : SDTypeProfile<0, 3,
+def SDT_ARMStructByVal : SDTypeProfile<0, 4,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>,
- SDTCisVT<2, i32>]>;
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
@@ -4174,9 +4174,9 @@ let usesCustomInserter = 1 in {
let usesCustomInserter = 1 in {
def COPY_STRUCT_BYVAL_I32 : PseudoInst<
- (outs), (ins GPR:$dst, GPR:$src, i32imm:$size),
+ (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
NoItinerary,
- [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size)]>;
+ [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
}
let mayLoad = 1 in {