diff options
-rw-r--r-- | lib/Target/Mips/MipsISelLowering.cpp | 99 | ||||
-rw-r--r-- | test/CodeGen/Mips/unalignedload.ll | 31 |
2 files changed, 101 insertions, 29 deletions
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 97b5a145ab..f2682cfd24 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1805,43 +1805,90 @@ WriteByValArg(SDValue& Chain, DebugLoc dl, SmallVector<SDValue, 8>& MemOpChains, int& LastFI, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, - MVT PtrType) { - unsigned FirstWord = VA.getLocMemOffset() / 4; - unsigned NumWords = (Flags.getByValSize() + 3) / 4; - unsigned LastWord = FirstWord + NumWords; - unsigned CurWord; + MVT PtrType, bool isLittle) { + unsigned LocMemOffset = VA.getLocMemOffset(); + unsigned Offset = 0; + uint32_t RemainingSize = Flags.getByValSize(); unsigned ByValAlign = Flags.getByValAlign(); - // copy the first 4 words of byval arg to registers A0 - A3 - for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize); - ++CurWord) { + // Copy the first 4 words of byval arg to registers A0 - A3. + // FIXME: Use a stricter alignment if it enables better optimization in passes + // run later. + for (; RemainingSize >= 4 && LocMemOffset < 4 * 4; + Offset += 4, RemainingSize -= 4, LocMemOffset += 4) { SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant((CurWord - FirstWord) * 4, - MVT::i32)); + DAG.getConstant(Offset, MVT::i32)); SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, MachinePointerInfo(), false, false, std::min(ByValAlign, (unsigned )4)); MemOpChains.push_back(LoadVal.getValue(1)); - unsigned DstReg = O32IntRegs[CurWord]; + unsigned DstReg = O32IntRegs[LocMemOffset / 4]; RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); } - // copy remaining part of byval arg to stack. - if (CurWord < LastWord) { - unsigned SizeInBytes = (LastWord - CurWord) * 4; - SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant((CurWord - FirstWord) * 4, - MVT::i32)); - LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); - Chain = DAG.getMemcpy(Chain, dl, Dst, Src, - DAG.getConstant(SizeInBytes, MVT::i32), - /*Align*/ByValAlign, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - MemOpChains.push_back(Chain); + if (RemainingSize == 0) + return; + + // If there still is a register available for argument passing, write the + // remaining part of the structure to it using subword loads and shifts. + if (LocMemOffset < 4 * 4) { + assert(RemainingSize <= 3 && RemainingSize >= 1 && + "There must be one to three bytes remaining."); + unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize); + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + unsigned Alignment = std::min(ByValAlign, (unsigned )4); + SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSize * 8), false, + false, Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // If target is big endian, shift it to the most significant half-word or + // byte. + if (!isLittle) + LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal, + DAG.getConstant(32 - LoadSize * 8, MVT::i32)); + + Offset += LoadSize; + RemainingSize -= LoadSize; + + // Read second subword if necessary. + if (RemainingSize != 0) { + assert(RemainingSize == 1 && "There must be one byte remaining."); + LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + unsigned Alignment = std::min(ByValAlign, (unsigned )2); + SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + LoadPtr, MachinePointerInfo(), + MVT::i8, false, false, Alignment); + MemOpChains.push_back(Subword.getValue(1)); + // Insert the loaded byte to LoadVal. + // FIXME: Use INS if supported by target. + unsigned ShiftAmt = isLittle ? 16 : 8; + SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword, + DAG.getConstant(ShiftAmt, MVT::i32)); + LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift); + } + + unsigned DstReg = O32IntRegs[LocMemOffset / 4]; + RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); + return; } + + // Create a fixed object on stack at offset LocMemOffset and copy + // remaining part of byval arg to it using memcpy. + SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); + Chain = DAG.getMemcpy(Chain, dl, Dst, Src, + DAG.getConstant(RemainingSize, MVT::i32), + std::min(ByValAlign, (unsigned)4), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); } /// LowerCall - functions arguments are copied from virtual regs to @@ -1974,7 +2021,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, - VA, Flags, getPointerTy()); + VA, Flags, getPointerTy(), Subtarget->isLittle()); continue; } diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll index 072f0574e7..433e896d19 100644 --- a/test/CodeGen/Mips/unalignedload.ll +++ b/test/CodeGen/Mips/unalignedload.ll @@ -1,16 +1,41 @@ -; RUN: llc -march=mips < %s | FileCheck %s - +; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL +; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB %struct.S2 = type { %struct.S1, %struct.S1 } %struct.S1 = type { i8, i8 } +%struct.S4 = type { [7 x i8] } @s2 = common global %struct.S2 zeroinitializer, align 1 +@s4 = common global %struct.S4 zeroinitializer, align 1 define void @foo1() nounwind { entry: -; CHECK: ulw ${{[0-9]+}}, 2 +; CHECK-EL: lw $25, %call16(foo2) +; CHECK-EL: ulhu $4, 2 +; CHECK-EL: lw $[[R0:[0-9]+]], %got(s4) +; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]]) +; CHECK-EL: ulhu $[[R2:[0-9]+]], 4($[[R0]]) +; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16 +; CHECK-EL: ulw $4, 0($[[R0]]) +; CHECK-EL: lw $25, %call16(foo4) +; CHECK-EL: or $5, $[[R2]], $[[R3]] + +; CHECK-EB: ulhu $[[R0:[0-9]+]], 2 +; CHECK-EB: lw $25, %call16(foo2) +; CHECK-EB: sll $4, $[[R0]], 16 +; CHECK-EB: lw $[[R1:[0-9]+]], %got(s4) +; CHECK-EB: ulhu $[[R2:[0-9]+]], 4($[[R1]]) +; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]]) +; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16 +; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8 +; CHECK-EB: ulw $4, 0($[[R1]]) +; CHECK-EB: lw $25, %call16(foo4) +; CHECK-EB: or $5, $[[R4]], $[[R5]] tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind + tail call void @foo4(%struct.S4* byval @s4) nounwind ret void } declare void @foo2(%struct.S1* byval) + +declare void @foo4(%struct.S4* byval) |