aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Grosbach <grosbach@apple.com>2010-03-24 22:31:46 +0000
committerJim Grosbach <grosbach@apple.com>2010-03-24 22:31:46 +0000
commit2676737e5ed3e4b5c89b4d06b60d998e9318eb73 (patch)
tree48ab73eca523ba3889720df84425b36b6be252fc
parent044be39090a702504c62fc0544fc977a6caa7112 (diff)
Make the use of the vmla and vmls VFP instructions controllable via cmd line.
Preliminary testing shows significant performance wins by not using these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99436 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td14
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td2
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
5 files changed, 30 insertions, 4 deletions
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index f73707fa43..08ead22dc0 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1369,6 +1369,20 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{4} = op4;
}
+// Double precision, binary, VML[AS] (for additional predicate)
+class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1011;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+ list<Predicate> Predicates = [HasVFP2, UseVMLx];
+}
+
+
// Single precision, unary
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 26a280697b..f2ab06f328 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -140,6 +140,8 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index aca8230221..0458389286 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -545,7 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
// FP FMA Operations.
//
-def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
@@ -558,7 +558,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
@@ -571,7 +571,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
[(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
@@ -589,7 +589,7 @@ def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
-def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 2dad7f1106..6b4438d4c8 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -26,6 +26,10 @@ static cl::opt<bool>
UseNEONFP("arm-use-neon-fp",
cl::desc("Use NEON for single-precision FP"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+UseVMLxInstructions("arm-use-vmlx",
+ cl::desc("Use VFP vmla and vmls instructions"),
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
UseMOVT("arm-use-movt",
@@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
: ARMArchVersion(V4)
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(UseNEONFP)
+ , UseVMLx(UseVMLxInstructions)
, IsThumb(isT)
, ThumbMode(Thumb1)
, PostRAScheduler(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 2dc81a4d6d..686684cab8 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -50,6 +50,10 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// UseVMLx - If the VFP2 instructions are available, indicates whether
+ /// the VML[AS] instructions should be used.
+ bool UseVMLx;
+
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
bool IsThumb;
@@ -119,6 +123,7 @@ protected:
bool hasNEON() const { return ARMFPUType >= NEON; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
+ bool useVMLx() const {return hasVFP2() && UseVMLx; }
bool hasFP16() const { return HasFP16; }