aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2013-02-15 23:18:01 +0000
committerBill Wendling <isanbard@gmail.com>2013-02-15 23:18:01 +0000
commit9be8b4fc92e1ace819a78db512c1f945c1471be7 (patch)
tree90507303bf41bb899a4274fdd27b75521c1d62c3
parentf78708593407286de34506e699da25a56b65a20d (diff)
Reinitialize the ivars in the subtarget.
When we're recalculating the feature set of the subtarget, we need to have the ivars in their initial state. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175320 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp84
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--lib/Target/X86/X86Subtarget.cpp72
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--test/CodeGen/X86/subtarget-feature-change.ll66
5 files changed, 156 insertions, 74 deletions
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index c33bb9d5bc..e11314d4fc 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -45,51 +45,55 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
- , HasV4TOps(false)
- , HasV5TOps(false)
- , HasV5TEOps(false)
- , HasV6Ops(false)
- , HasV6T2Ops(false)
- , HasV7Ops(false)
- , HasVFPv2(false)
- , HasVFPv3(false)
- , HasVFPv4(false)
- , HasNEON(false)
- , UseNEONForSinglePrecisionFP(false)
- , UseMulOps(UseFusedMulOps)
- , SlowFPVMLx(false)
- , HasVMLxForwarding(false)
- , SlowFPBrcc(false)
- , InThumbMode(false)
- , HasThumb2(false)
- , IsMClass(false)
- , NoARM(false)
- , PostRAScheduler(false)
- , IsR9Reserved(ReserveR9)
- , UseMovt(false)
- , SupportsTailCall(false)
- , HasFP16(false)
- , HasD16(false)
- , HasHardwareDivide(false)
- , HasHardwareDivideInARM(false)
- , HasT2ExtractPack(false)
- , HasDataBarrier(false)
- , Pref32BitThumb(false)
- , AvoidCPSRPartialUpdate(false)
- , AvoidMOVsShifterOperand(false)
- , HasRAS(false)
- , HasMPExtension(false)
- , FPOnlySP(false)
- , AllowsUnalignedMem(false)
- , Thumb2DSP(false)
- , UseNaClTrap(false)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
, TargetABI(ARM_ABI_APCS) {
+ initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
+void ARMSubtarget::initializeEnvironment() {
+ HasV4TOps = false;
+ HasV5TOps = false;
+ HasV5TEOps = false;
+ HasV6Ops = false;
+ HasV6T2Ops = false;
+ HasV7Ops = false;
+ HasVFPv2 = false;
+ HasVFPv3 = false;
+ HasVFPv4 = false;
+ HasNEON = false;
+ UseNEONForSinglePrecisionFP = false;
+ UseMulOps = UseFusedMulOps;
+ SlowFPVMLx = false;
+ HasVMLxForwarding = false;
+ SlowFPBrcc = false;
+ InThumbMode = false;
+ HasThumb2 = false;
+ IsMClass = false;
+ NoARM = false;
+ PostRAScheduler = false;
+ IsR9Reserved = ReserveR9;
+ UseMovt = false;
+ SupportsTailCall = false;
+ HasFP16 = false;
+ HasD16 = false;
+ HasHardwareDivide = false;
+ HasHardwareDivideInARM = false;
+ HasT2ExtractPack = false;
+ HasDataBarrier = false;
+ Pref32BitThumb = false;
+ AvoidCPSRPartialUpdate = false;
+ AvoidMOVsShifterOperand = false;
+ HasRAS = false;
+ HasMPExtension = false;
+ FPOnlySP = false;
+ AllowsUnalignedMem = false;
+ Thumb2DSP = false;
+ UseNaClTrap = false;
+}
+
void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
AttributeSet FnAttrs = MF->getFunction()->getAttributes();
Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
@@ -100,8 +104,10 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
!CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
std::string FS =
!FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty())
+ if (!FS.empty()) {
+ initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ }
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 87834b8b66..f47555c035 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -204,8 +204,10 @@ protected:
/// \brief Reset the features for the X86 target.
virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
-
+public:
void computeIssueWidth();
bool hasV4TOps() const { return HasV4TOps; }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 58e0d06dfb..6391acfa80 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -336,8 +336,10 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
!CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
std::string FS =
!FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty())
+ if (!FS.empty()) {
+ initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ }
}
void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -417,46 +419,50 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
stackAlignment = 16;
}
+void X86Subtarget::initializeEnvironment() {
+ PICStyle = PICStyles::None;
+ X86SSELevel = NoMMXSSE;
+ X863DNowLevel = NoThreeDNow;
+ HasCMov = false;
+ HasX86_64 = false;
+ HasPOPCNT = false;
+ HasSSE4A = false;
+ HasAES = false;
+ HasPCLMUL = false;
+ HasFMA = false;
+ HasFMA4 = false;
+ HasXOP = false;
+ HasMOVBE = false;
+ HasRDRAND = false;
+ HasF16C = false;
+ HasFSGSBase = false;
+ HasLZCNT = false;
+ HasBMI = false;
+ HasBMI2 = false;
+ HasRTM = false;
+ HasADX = false;
+ IsBTMemSlow = false;
+ IsUAMemFast = false;
+ HasVectorUAMem = false;
+ HasCmpxchg16b = false;
+ UseLeaForSP = false;
+ HasSlowDivide = false;
+ PostRAScheduler = false;
+ PadShortFunctions = false;
+ stackAlignment = 4;
+ // FIXME: this is a known good value for Yonah. How about others?
+ MaxInlineSizeThreshold = 128;
+}
+
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
const std::string &FS,
unsigned StackAlignOverride, bool is64Bit)
: X86GenSubtargetInfo(TT, CPU, FS)
, X86ProcFamily(Others)
- , PICStyle(PICStyles::None)
- , X86SSELevel(NoMMXSSE)
- , X863DNowLevel(NoThreeDNow)
- , HasCMov(false)
- , HasX86_64(false)
- , HasPOPCNT(false)
- , HasSSE4A(false)
- , HasAES(false)
- , HasPCLMUL(false)
- , HasFMA(false)
- , HasFMA4(false)
- , HasXOP(false)
- , HasMOVBE(false)
- , HasRDRAND(false)
- , HasF16C(false)
- , HasFSGSBase(false)
- , HasLZCNT(false)
- , HasBMI(false)
- , HasBMI2(false)
- , HasRTM(false)
- , HasADX(false)
- , IsBTMemSlow(false)
- , IsUAMemFast(false)
- , HasVectorUAMem(false)
- , HasCmpxchg16b(false)
- , UseLeaForSP(false)
- , HasSlowDivide(false)
- , PostRAScheduler(false)
- , PadShortFunctions(false)
- , stackAlignment(4)
- // FIXME: this is a known good value for Yonah. How about others?
- , MaxInlineSizeThreshold(128)
, TargetTriple(TT)
, StackAlignOverride(StackAlignOverride)
, In64BitMode(is64Bit) {
+ initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d1c706725d..e97da4b6f4 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -201,8 +201,10 @@ public:
/// \brief Reset the features for the X86 target.
virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
-
+public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
return In64BitMode;
diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll
new file mode 100644
index 0000000000..cd677294c6
--- /dev/null
+++ b/test/CodeGen/X86/subtarget-feature-change.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; This should not generate SSE instructions:
+;
+; CHECK: without.sse:
+; CHECK: flds
+; CHECK: fmuls
+; CHECK: fstps
+define void @without.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #0 {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+ %1 = load float* %arrayidx2, align 4, !tbaa !0
+ %mul = fmul float %0, %1
+ %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+ store float %mul, float* %arrayidx4, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; This should generate SSE instructions:
+;
+; CHECK: with.sse
+; CHECK: movss
+; CHECK: mulss
+; CHECK: movss
+define void @with.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #1 {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+ %1 = load float* %arrayidx2, align 4, !tbaa !0
+ %mul = fmul float %0, %1
+ %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+ store float %mul, float* %arrayidx4, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" }
+attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}