aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Collingbourne <peter@pcc.me.uk>2011-10-27 19:19:51 +0000
committerPeter Collingbourne <peter@pcc.me.uk>2011-10-27 19:19:51 +0000
commitc5096cbf7a42e0f9012945b00d9037a5b5a88d72 (patch)
tree84a9e447bd9b0917cdf7c57a07984d9c0b68bb76
parent9d31fa75bc05fe4cb903a7701550f22cfb73ea8b (diff)
Annotate imprecise FP division with fpaccuracy metadata
The OpenCL single precision division operation is only required to be accurate to 2.5ulp. Annotate the fdiv instruction with metadata which signals to the backend that an imprecise divide instruction may be used. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@143136 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/CGExpr.cpp16
-rw-r--r--lib/CodeGen/CGExprScalar.cpp14
-rw-r--r--lib/CodeGen/CodeGenFunction.h5
-rw-r--r--test/CodeGenOpenCL/fpaccuracy.cl25
4 files changed, 58 insertions, 2 deletions
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index bd4e553991..9ad3ae8352 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -23,6 +23,7 @@
#include "clang/AST/DeclObjC.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Target/TargetData.h"
using namespace clang;
using namespace CodeGen;
@@ -2752,3 +2753,18 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
return RValue::get(0);
return ConvertTempToRValue(*this, E->getType(), OrigDest);
}
+
+void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, unsigned AccuracyN,
+ unsigned AccuracyD) {
+ assert(Val->getType()->isFPOrFPVectorTy());
+ if (!AccuracyN || !isa<llvm::Instruction>(Val))
+ return;
+
+ llvm::Value *Vals[2];
+ Vals[0] = llvm::ConstantInt::get(Int32Ty, AccuracyN);
+ Vals[1] = llvm::ConstantInt::get(Int32Ty, AccuracyD);
+ llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), Vals);
+
+ cast<llvm::Instruction>(Val)->setMetadata(llvm::LLVMContext::MD_fpaccuracy,
+ Node);
+}
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 582d1c4572..25b4a0a0e7 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -1772,8 +1772,18 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
Builder.SetInsertPoint(DivCont);
}
}
- if (Ops.LHS->getType()->isFPOrFPVectorTy())
- return Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
+ if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
+ llvm::Value *Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
+ if (CGF.getContext().getLangOptions().OpenCL) {
+ // OpenCL 1.1 7.4: minimum accuracy of single precision / is 2.5ulp
+ llvm::Type *ValTy = Val->getType();
+ if (ValTy->isFloatTy() ||
+ (isa<llvm::VectorType>(ValTy) &&
+ cast<llvm::VectorType>(ValTy)->getElementType()->isFloatTy()))
+ CGF.SetFPAccuracy(Val, 5, 2);
+ }
+ return Val;
+ }
else if (Ops.Ty->hasUnsignedIntegerRepresentation())
return Builder.CreateUDiv(Ops.LHS, Ops.RHS, "div");
else
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 858962d337..4940e21c8b 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -2382,6 +2382,11 @@ public:
/// a r-value suitable for passing the given parameter.
void EmitDelegateCallArg(CallArgList &args, const VarDecl *param);
+ /// SetFPAccuracy - Set the minimum required accuracy of the given floating
+ /// point operation, expressed as the maximum relative error in ulp.
+ void SetFPAccuracy(llvm::Value *Val, unsigned AccuracyN,
+ unsigned AccuracyD = 1);
+
private:
void EmitReturnOfRValue(RValue RV, QualType Ty);
diff --git a/test/CodeGenOpenCL/fpaccuracy.cl b/test/CodeGenOpenCL/fpaccuracy.cl
new file mode 100644
index 0000000000..47fca696f9
--- /dev/null
+++ b/test/CodeGenOpenCL/fpaccuracy.cl
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(4) )) float float4;
+
+float spscalardiv(float a, float b) {
+ // CHECK: @spscalardiv
+ // CHECK: fdiv{{.*}}, !fpaccuracy ![[MD:[0-9]+]]
+ return a / b;
+}
+
+float4 spvectordiv(float4 a, float4 b) {
+ // CHECK: @spvectordiv
+ // CHECK: fdiv{{.*}}, !fpaccuracy ![[MD]]
+ return a / b;
+}
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+double dpscalardiv(double a, double b) {
+ // CHECK: @dpscalardiv
+ // CHECK-NOT: !fpaccuracy
+ return a / b;
+}
+
+// CHECK: ![[MD]] = metadata !{i{{[0-9]+}} 5, i{{[0-9]+}} 2}