aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp24
-rw-r--r--lib/Target/X86/README-X86-64.txt44
-rw-r--r--test/CodeGen/X86/memset-2.ll9
3 files changed, 30 insertions, 47 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b639b7c9cf..110812c437 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3527,16 +3527,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
- unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstPtrInfo.getWithOffset(DstOff),
isVol, false, Align);
OutChains.push_back(Store);
- DstOff += VTSize;
+ DstOff += VT.getSizeInBits() / 8;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index 78c4dc00ee..e21d69a7bc 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -41,50 +41,6 @@ saved a few instructions.
//===---------------------------------------------------------------------===//
-Poor codegen:
-
-int X[2];
-int b;
-void test(void) {
- memset(X, b, 2*sizeof(X[0]));
-}
-
-llc:
- movq _b@GOTPCREL(%rip), %rax
- movzbq (%rax), %rax
- movq %rax, %rcx
- shlq $8, %rcx
- orq %rax, %rcx
- movq %rcx, %rax
- shlq $16, %rax
- orq %rcx, %rax
- movq %rax, %rcx
- shlq $32, %rcx
- movq _X@GOTPCREL(%rip), %rdx
- orq %rax, %rcx
- movq %rcx, (%rdx)
- ret
-
-gcc:
- movq _b@GOTPCREL(%rip), %rax
- movabsq $72340172838076673, %rdx
- movzbq (%rax), %rax
- imulq %rdx, %rax
- movq _X@GOTPCREL(%rip), %rdx
- movq %rax, (%rdx)
- ret
-
-And the codegen is even worse for the following
-(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
- void fill1(char *s, int a)
- {
- __builtin_memset(s, a, 15);
- }
-
-For this version, we duplicate the computation of the constant to store.
-
-//===---------------------------------------------------------------------===//
-
It's not possible to reference AH, BH, CH, and DH registers in an instruction
requiring REX prefix. However, divb and mulb both produce results in AH. If isel
emits a CopyFromReg which gets turned into a movb and that can be allocated a
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 11f619cd6d..ae6b6e9772 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -28,3 +28,12 @@ entry:
; CHECK: imull $16843009
}
+define void @t4(i8* nocapture %s, i8 %a) nounwind {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
+ ret void
+; CHECK: t4:
+; CHECK: imull $16843009
+; CHECK-NOT: imul
+; CHECK: ret
+}