diff options
-rw-r--r-- | include/llvm/IntrinsicsX86.td | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 12 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-02-25-CommuteBug.ll | 14 |
3 files changed, 22 insertions, 12 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index b435d8e689..37ba59c921 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -19,13 +19,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], [IntrNoMem, Commutative]>; + llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], [IntrNoMem, Commutative]>; + llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; @@ -176,13 +176,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem, Commutative]>; + llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem, Commutative]>; + llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 407b4f1b06..3e00c3b4a0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -526,7 +526,7 @@ def FsANDNPSrm : PSI<0x55, MRMSrcMem, /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the /// plain scalar form, in that it takes an entire vector (instead of a scalar) -/// and leaves the top elements undefined. +/// and leaves the top elements unmodified (therefore these cannot be commuted). /// /// These three forms can each be reg+reg or reg+mem, so there are a total of /// six "instructions". @@ -566,9 +566,7 @@ multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr, def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } + [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]>; // Intrinsic operation, reg+mem. def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), @@ -1275,7 +1273,7 @@ def FsANDNPDrm : PDI<0x55, MRMSrcMem, /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the /// plain scalar form, in that it takes an entire vector (instead of a scalar) -/// and leaves the top elements undefined. +/// and leaves the top elements unmodified (therefore these cannot be commuted). /// /// These three forms can each be reg+reg or reg+mem, so there are a total of /// six "instructions". @@ -1315,9 +1313,7 @@ multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr, def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } + [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]>; // Intrinsic operation, reg+mem. def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll new file mode 100644 index 0000000000..b772bf8ec3 --- /dev/null +++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& not grep commuted +; rdar://6608609 + +define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone { +entry: + %tmp.i2 = bitcast <2 x double> %B to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458> ; <<2 x i64>> [#uses=1] + %tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double> ; <<2 x double>> [#uses=1] + %0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone ; <<2 x double>> [#uses=1] + %tmp.i = add <2 x double> %0, %C ; <<2 x double>> [#uses=1] + ret <2 x double> %tmp.i +} + +declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone |