call libc memcpy/memset if array size is bigger then threshold.

Coping 100MB array (after a warmup) shows that glibc 2.6.1 implementation on x86-64 (core 2) is 30% faster (from 0.270917s to 0.188079s) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@41479 91177308-0d34-0410-b5e6-96231b3b80d8
author: Rafael Espindola <rafael.espindola@gmail.com> 2007-08-27 10:18:20 +0000
committer: Rafael Espindola <rafael.espindola@gmail.com> 2007-08-27 10:18:20 +0000
commit: 6b83b5d1ae07dcd1c5987f7548ed2cf4be73b6a1 (patch)
tree: c66d22904e779c42f651ed9057b19a632f7fa62f
parent: 9f528e628090ee0ffca35d4577c23b6544f9a119 (diff)
2 files changed, 20 insertions, 18 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6673c5f639..75fbd4490a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3753,10 +3753,10 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
   if (Align == 0) Align = 1;
 
   ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
-  // If not DWORD aligned, call memset if size is less than the threshold.
+  // If not DWORD aligned or size is more than the threshold, call memset.
   // It knows how to align to the right boundary first.
   if ((Align & 3) != 0 ||
-      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
+      (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
     MVT::ValueType IntPtr = getPointerTy();
     const Type *IntPtrTy = getTargetData()->getIntPtrType();
     TargetLowering::ArgListTy Args; 
@@ -3909,10 +3909,10 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
   if (Align == 0) Align = 1;
 
   ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
-  // If not DWORD aligned, call memcpy if size is less than the threshold.
+  // If not DWORD aligned or size is more than the threshold, call memcpy.
   // It knows how to align to the right boundary first.
   if ((Align & 3) != 0 ||
-      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
+      (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
     MVT::ValueType IntPtr = getPointerTy();
     TargetLowering::ArgListTy Args;
     TargetLowering::ArgListEntry Entry;
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.llx b/test/CodeGen/X86/2004-02-12-Memcpy.llx
index 8cd9a50cbb..56b8d3ba46 100644
--- a/test/CodeGen/X86/2004-02-12-Memcpy.llx
+++ b/test/CodeGen/X86/2004-02-12-Memcpy.llx
@@ -1,24 +1,26 @@
-; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 | grep movs
-declare void %llvm.memcpy.i32(sbyte* %A, sbyte* %B, uint %amt, uint %align)
+; RUN: llvm-as < %s | llc -march=x86 | grep movs | count 1
+; RUN: llvm-as < %s | llc -march=x86 | grep memcpy | count 2
 
-%A = global [1000 x int] zeroinitializer
-%B = global [1000 x int] zeroinitializer
+@A = global [32 x i32] zeroinitializer
+@B = global [32 x i32] zeroinitializer
 
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
-void %main() {
+define void @main() {
   ; dword copy
-  call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
-                           sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
-                           uint 4000, uint 4)
+  call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                           i32 128, i32 4 )
 
   ; word copy
-  call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
-                           sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
-                           uint 4000, uint 2)
+  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                           i32 128, i32 2 )
 
   ; byte copy
-  call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
-                           sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
-                           uint 4000, uint 1)
+  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                            i32 128, i32 1 )
+
   ret void
 }
author	Rafael Espindola <rafael.espindola@gmail.com>	2007-08-27 10:18:20 +0000
committer	Rafael Espindola <rafael.espindola@gmail.com>	2007-08-27 10:18:20 +0000
commit	6b83b5d1ae07dcd1c5987f7548ed2cf4be73b6a1 (patch)
tree	c66d22904e779c42f651ed9057b19a632f7fa62f
parent	9f528e628090ee0ffca35d4577c23b6544f9a119 (diff)