From 9be8b4fc92e1ace819a78db512c1f945c1471be7 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 23:18:01 +0000 Subject: Reinitialize the ivars in the subtarget. When we're recalculating the feature set of the subtarget, we need to have the ivars in their initial state. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175320 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 84 +++++++++++++++------------- lib/Target/ARM/ARMSubtarget.h | 4 +- lib/Target/X86/X86Subtarget.cpp | 72 +++++++++++++----------- lib/Target/X86/X86Subtarget.h | 4 +- test/CodeGen/X86/subtarget-feature-change.ll | 66 ++++++++++++++++++++++ 5 files changed, 156 insertions(+), 74 deletions(-) create mode 100644 test/CodeGen/X86/subtarget-feature-change.ll diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c33bb9d5bc..e11314d4fc 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -45,51 +45,55 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) - , HasV4TOps(false) - , HasV5TOps(false) - , HasV5TEOps(false) - , HasV6Ops(false) - , HasV6T2Ops(false) - , HasV7Ops(false) - , HasVFPv2(false) - , HasVFPv3(false) - , HasVFPv4(false) - , HasNEON(false) - , UseNEONForSinglePrecisionFP(false) - , UseMulOps(UseFusedMulOps) - , SlowFPVMLx(false) - , HasVMLxForwarding(false) - , SlowFPBrcc(false) - , InThumbMode(false) - , HasThumb2(false) - , IsMClass(false) - , NoARM(false) - , PostRAScheduler(false) - , IsR9Reserved(ReserveR9) - , UseMovt(false) - , SupportsTailCall(false) - , HasFP16(false) - , HasD16(false) - , HasHardwareDivide(false) - , HasHardwareDivideInARM(false) - , HasT2ExtractPack(false) - , HasDataBarrier(false) - , Pref32BitThumb(false) - , AvoidCPSRPartialUpdate(false) - , AvoidMOVsShifterOperand(false) - , HasRAS(false) - , HasMPExtension(false) - , FPOnlySP(false) - , AllowsUnalignedMem(false) - , Thumb2DSP(false) - , UseNaClTrap(false) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } +void ARMSubtarget::initializeEnvironment() { + HasV4TOps = false; + HasV5TOps = false; + HasV5TEOps = false; + HasV6Ops = false; + HasV6T2Ops = false; + HasV7Ops = false; + HasVFPv2 = false; + HasVFPv3 = false; + HasVFPv4 = false; + HasNEON = false; + UseNEONForSinglePrecisionFP = false; + UseMulOps = UseFusedMulOps; + SlowFPVMLx = false; + HasVMLxForwarding = false; + SlowFPBrcc = false; + InThumbMode = false; + HasThumb2 = false; + IsMClass = false; + NoARM = false; + PostRAScheduler = false; + IsR9Reserved = ReserveR9; + UseMovt = false; + SupportsTailCall = false; + HasFP16 = false; + HasD16 = false; + HasHardwareDivide = false; + HasHardwareDivideInARM = false; + HasT2ExtractPack = false; + HasDataBarrier = false; + Pref32BitThumb = false; + AvoidCPSRPartialUpdate = false; + AvoidMOVsShifterOperand = false; + HasRAS = false; + HasMPExtension = false; + FPOnlySP = false; + AllowsUnalignedMem = false; + Thumb2DSP = false; + UseNaClTrap = false; +} + void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { AttributeSet FnAttrs = MF->getFunction()->getAttributes(); Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, @@ -100,8 +104,10 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) + if (!FS.empty()) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + } } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 87834b8b66..f47555c035 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -204,8 +204,10 @@ protected: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); - +public: void computeIssueWidth(); bool hasV4TOps() const { return HasV4TOps; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 58e0d06dfb..6391acfa80 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -336,8 +336,10 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) + if (!FS.empty()) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + } } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -417,46 +419,50 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { stackAlignment = 16; } +void X86Subtarget::initializeEnvironment() { + PICStyle = PICStyles::None; + X86SSELevel = NoMMXSSE; + X863DNowLevel = NoThreeDNow; + HasCMov = false; + HasX86_64 = false; + HasPOPCNT = false; + HasSSE4A = false; + HasAES = false; + HasPCLMUL = false; + HasFMA = false; + HasFMA4 = false; + HasXOP = false; + HasMOVBE = false; + HasRDRAND = false; + HasF16C = false; + HasFSGSBase = false; + HasLZCNT = false; + HasBMI = false; + HasBMI2 = false; + HasRTM = false; + HasADX = false; + IsBTMemSlow = false; + IsUAMemFast = false; + HasVectorUAMem = false; + HasCmpxchg16b = false; + UseLeaForSP = false; + HasSlowDivide = false; + PostRAScheduler = false; + PadShortFunctions = false; + stackAlignment = 4; + // FIXME: this is a known good value for Yonah. How about others? + MaxInlineSizeThreshold = 128; +} + X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) - , PICStyle(PICStyles::None) - , X86SSELevel(NoMMXSSE) - , X863DNowLevel(NoThreeDNow) - , HasCMov(false) - , HasX86_64(false) - , HasPOPCNT(false) - , HasSSE4A(false) - , HasAES(false) - , HasPCLMUL(false) - , HasFMA(false) - , HasFMA4(false) - , HasXOP(false) - , HasMOVBE(false) - , HasRDRAND(false) - , HasF16C(false) - , HasFSGSBase(false) - , HasLZCNT(false) - , HasBMI(false) - , HasBMI2(false) - , HasRTM(false) - , HasADX(false) - , IsBTMemSlow(false) - , IsUAMemFast(false) - , HasVectorUAMem(false) - , HasCmpxchg16b(false) - , UseLeaForSP(false) - , HasSlowDivide(false) - , PostRAScheduler(false) - , PadShortFunctions(false) - , stackAlignment(4) - // FIXME: this is a known good value for Yonah. How about others? - , MaxInlineSizeThreshold(128) , TargetTriple(TT) , StackAlignOverride(StackAlignOverride) , In64BitMode(is64Bit) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index d1c706725d..e97da4b6f4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -201,8 +201,10 @@ public: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); - +public: /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll new file mode 100644 index 0000000000..cd677294c6 --- /dev/null +++ b/test/CodeGen/X86/subtarget-feature-change.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; This should not generate SSE instructions: +; +; CHECK: without.sse: +; CHECK: flds +; CHECK: fmuls +; CHECK: fstps +define void @without.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #0 { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4, !tbaa !0 + %mul = fmul float %0, %1 + %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv + store float %mul, float* %arrayidx4, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +; This should generate SSE instructions: +; +; CHECK: with.sse +; CHECK: movss +; CHECK: mulss +; CHECK: movss +define void @with.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #1 { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4, !tbaa !0 + %mul = fmul float %0, %1 + %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv + store float %mul, float* %arrayidx4, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" } +attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" } + +!0 = metadata !{metadata !"float", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} -- cgit v1.2.3-18-g5258