diff options
-rw-r--r-- | lib/CodeGen/TwoAddressInstructionPass.cpp | 14 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 25 | ||||
-rw-r--r-- | test/CodeGen/X86/2010-07-02-UnfoldBug.ll | 99 |
3 files changed, 126 insertions, 12 deletions
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 0c97dad48b..62fa0fdb77 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -926,14 +926,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - bool Success = - TII->unfoldMemoryOperand(MF, mi, Reg, - /*UnfoldLoad=*/true, /*UnfoldStore=*/false, - NewMIs); - (void)Success; - assert(Success && - "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " - "succeeded!"); + if (!TII->unfoldMemoryOperand(MF, mi, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } assert(NewMIs.size() == 2 && "Unfolded a load into multiple instructions!"); // The load was previously folded, so this is the only use. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index caa623399d..c1d66cb570 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2159,7 +2159,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = *MMOBegin && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2189,7 +2189,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = *MMOBegin && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2693,6 +2693,13 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; const TargetRegisterClass *RC = TOI.getRegClass(&RI); + if (!MI->hasOneMemOperand() && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Without memoperands, loadRegFromAddr and storeRegToStackSlot will + // conservatively assume the address is unaligned. That's bad for + // performance. + return false; SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; SmallVector<MachineOperand,2> BeforeOps; SmallVector<MachineOperand,2> AfterOps; @@ -2834,7 +2841,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), cast<MachineSDNode>(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned load. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); @@ -2871,7 +2883,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), cast<MachineSDNode>(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned store. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, diff --git a/test/CodeGen/X86/2010-07-02-UnfoldBug.ll b/test/CodeGen/X86/2010-07-02-UnfoldBug.ll new file mode 100644 index 0000000000..79219dcfe6 --- /dev/null +++ b/test/CodeGen/X86/2010-07-02-UnfoldBug.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin +; rdar://8154265 + +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + +define void @_ZN2CA3OGL20fill_surface_mesh_3dERNS0_7ContextEPKNS_6Render13MeshTransformEPKNS0_5LayerEPNS0_7SurfaceEfNS0_13TextureFilterESC_f() nounwind optsize ssp { +entry: + br i1 undef, label %bb2.thread, label %bb2 + +bb2.thread: ; preds = %entry + br i1 undef, label %bb41, label %bb10.preheader + +bb2: ; preds = %entry + unreachable + +bb10.preheader: ; preds = %bb2.thread + br i1 undef, label %bb9, label %bb12 + +bb9: ; preds = %bb9, %bb10.preheader + br i1 undef, label %bb9, label %bb12 + +bb12: ; preds = %bb9, %bb10.preheader + br i1 undef, label %bb4.i.i, label %bb3.i.i + +bb3.i.i: ; preds = %bb12 + unreachable + +bb4.i.i: ; preds = %bb12 + br i1 undef, label %bb8.i.i, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + +bb8.i.i: ; preds = %bb4.i.i + br i1 undef, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit, label %bb9.i.i + +bb9.i.i: ; preds = %bb8.i.i + br i1 undef, label %bb11.i.i, label %bb10.i.i + +bb10.i.i: ; preds = %bb9.i.i + unreachable + +bb11.i.i: ; preds = %bb9.i.i + unreachable + +_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit: ; preds = %bb8.i.i, %bb4.i.i + br i1 undef, label %bb19, label %bb14 + +bb14: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + unreachable + +bb19: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + br i1 undef, label %bb.i50, label %bb6.i + +bb.i50: ; preds = %bb19 + unreachable + +bb6.i: ; preds = %bb19 + br i1 undef, label %bb28, label %bb.nph106 + +bb22: ; preds = %bb24.preheader + br i1 undef, label %bb2.i.i, label %bb.i.i49 + +bb.i.i49: ; preds = %bb22 + %0 = load float* undef, align 4 ; <float> [#uses=1] + %1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1] + %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1] + %4 = extractelement <4 x float> %3, i32 0 ; <float> [#uses=1] + store float %4, float* undef, align 4 + %5 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> undef) nounwind readnone ; <<4 x float>> [#uses=1] + %6 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %5, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1] + %7 = extractelement <4 x float> %6, i32 0 ; <float> [#uses=1] + store float %7, float* undef, align 4 + unreachable + +bb2.i.i: ; preds = %bb22 + unreachable + +bb26.loopexit: ; preds = %bb24.preheader + br i1 undef, label %bb28, label %bb24.preheader + +bb.nph106: ; preds = %bb6.i + br label %bb24.preheader + +bb24.preheader: ; preds = %bb.nph106, %bb26.loopexit + br i1 undef, label %bb22, label %bb26.loopexit + +bb28: ; preds = %bb26.loopexit, %bb6.i + unreachable + +bb41: ; preds = %bb2.thread + br i1 undef, label %return, label %bb46 + +bb46: ; preds = %bb41 + ret void + +return: ; preds = %bb41 + ret void +} |