aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2011-01-11 01:07:24 +0000
committerChandler Carruth <chandlerc@gmail.com>2011-01-11 01:07:24 +0000
commit15ed90c859e5df11112c614c83d0d2e786d4c73a (patch)
tree45d675682bdddfd8af9af853064ad81b762f2bb6
parentf7b0047f5f4d3525486d8fc139f05bddc4265d01 (diff)
Teach constant folding to perform conversions from constant floating
point values to their integer representation through the SSE intrinsic calls. This is the last part of a README.txt entry for which I have real world examples. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123206 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Analysis/ConstantFolding.cpp56
-rw-r--r--lib/Target/README.txt55
-rw-r--r--test/Transforms/ConstProp/calls.ll33
3 files changed, 89 insertions, 55 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 1b38c027da..300821026a 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1047,6 +1047,14 @@ llvm::canConstantFoldCallTo(const Function *F) {
case Intrinsic::smul_with_overflow:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
return true;
default:
return false;
@@ -1116,6 +1124,36 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
return 0; // dummy return to suppress warning
}
+/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
+/// conversion of a constant floating point. If roundTowardZero is false, the
+/// default IEEE rounding is used (toward nearest, ties to even). This matches
+/// the behavior of the non-truncating SSE instructions in the default rounding
+/// mode. The desired integer type Ty is used to select how many bits are
+/// available for the result. Returns null if the conversion cannot be
+/// performed, otherwise returns the Constant value resulting from the
+/// conversion.
+static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
+ const Type *Ty) {
+ assert(Op && "Called with NULL operand");
+ APFloat Val(Op->getValueAPF());
+
+ // All of these conversion intrinsics form an integer of at most 64bits.
+ unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+ assert(ResultWidth <= 64 &&
+ "Can only constant fold conversions to 64 and 32 bit ints");
+
+ uint64_t UIntVal;
+ bool isExact = false;
+ APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+ : APFloat::rmNearestTiesToEven;
+ APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+ /*isSigned=*/true, mode,
+ &isExact);
+ if (status != APFloat::opOK && status != APFloat::opInexact)
+ return 0;
+ return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
/// ConstantFoldCall - Attempt to constant fold a call to the specified function
/// with the specified arguments, returning null if unsuccessful.
Constant *
@@ -1246,6 +1284,24 @@ llvm::ConstantFoldCall(Function *F,
}
}
+ if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+ return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+ return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+ }
+ }
+
if (isa<UndefValue>(Operands[0])) {
if (F->getIntrinsicID() == Intrinsic::bswap)
return Operands[0];
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 194a19219c..c3a9330ba6 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2259,58 +2259,3 @@ Since we know that x+2.0 doesn't care about the sign of any zeros in X, we can
transform the fmul to 0.0, and then the fadd to 2.0.
//===---------------------------------------------------------------------===//
-
-clang -O3 currently compiles this code:
-
-#include <emmintrin.h>
-int f(double x) { return _mm_cvtsd_si32(_mm_set_sd(x)); }
-int g(double x) { return _mm_cvttsd_si32(_mm_set_sd(x)); }
-
-into
-
-define i32 @_Z1fd(double %x) nounwind readnone {
-entry:
- %vecinit.i = insertelement <2 x double> undef, double %x, i32 0
- %vecinit1.i = insertelement <2 x double> %vecinit.i, double 0.000000e+00,i32 1
- %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %vecinit1.i) nounwind
- ret i32 %0
-}
-
-define i32 @_Z1gd(double %x) nounwind readnone {
-entry:
- %conv.i = fptosi double %x to i32
- ret i32 %conv.i
-}
-
-This difference carries over to the assmebly produced, resulting in:
-
-_Z1fd: # @_Z1fd
-# BB#0: # %entry
- pushq %rbp
- movq %rsp, %rbp
- xorps %xmm1, %xmm1
- movsd %xmm0, %xmm1
- cvtsd2sil %xmm1, %eax
- popq %rbp
- ret
-
-_Z1gd: # @_Z1gd
-# BB#0: # %entry
- pushq %rbp
- movq %rsp, %rbp
- cvttsd2si %xmm0, %eax
- popq %rbp
- ret
-
-The problem is that we can't see through the intrinsic call used for cvtsd2si,
-and fold away the unnecessary manipulation of the function parameter. When
-these functions are inlined, it forms a barrier preventing many further
-optimizations. LLVM IR doesn't have a good way to model the logic of
-'cvtsd2si', its only FP -> int conversion path forces truncation. We should add
-a rounding flag onto fptosi so that it can represent this type of rounding
-naturally in the IR rather than using intrinsics. We might need to use a
-'system_rounding_mode' flag to encode that the semantics of the rounding mode
-can be changed by the program, but ideally we could just say that isn't
-supported, and hard code the rounding.
-
-//===---------------------------------------------------------------------===//
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index a12fc82d64..82d73245ad 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -21,3 +21,36 @@ define double @T() {
%c = fadd double %b, %D
ret double %c
}
+
+define i1 @test_sse_cvt() nounwind readnone {
+; CHECK: @test_sse_cvt
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+ %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+ %i1 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+ %i2 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+ %i3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+ %i4 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 1.75, double undef>) nounwind
+ %i5 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 1.75, double undef>) nounwind
+ %i6 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+ %i7 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+ %sum11 = add i32 %i0, %i1
+ %sum12 = add i32 %i4, %i5
+ %sum1 = add i32 %sum11, %sum12
+ %sum21 = add i64 %i2, %i3
+ %sum22 = add i64 %i6, %i7
+ %sum2 = add i64 %sum21, %sum22
+ %sum1.sext = sext i32 %sum1 to i64
+ %b = icmp eq i64 %sum1.sext, %sum2
+ ret i1 %b
+}
+
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone