aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@apple.com>2011-11-17 07:15:58 +0000
committerChad Rosier <mcrosier@apple.com>2011-11-17 07:15:58 +0000
commit478b06c9801f0b75c5216ce0886ac3d630d4fc7b (patch)
tree54fa90701be13d4ab0be4cfb05352cfec4de49be
parentec43d1f553cb440df1b435d3798063d0cba6a117 (diff)
When fast iseling a GEP, accumulate the offset rather than emitting a series of
ADDs. MaxOffs is used as a threshold to limit the size of the offset. Tradeoffs being: (1) If we can't materialize the large constant then we'll cause fast-isel to bail. (2) Too large of an offset can't be directly encoded in the ADD resulting in a MOV+ADD. Generally not a bad thing because otherwise we would have had ADD+ADD, but on Thumb this turns into a MOVS+MOVT+ADD. Working on a fix for that. (3) Conversely, too low of a threshold we'll miss opportunities to coalesce ADDs. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144886 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp46
-rw-r--r--test/CodeGen/ARM/fast-isel-GEP-coalesce.ll65
2 files changed, 100 insertions, 11 deletions
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index f0fe179e05..cff37c2c86 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -437,6 +437,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
bool NIsKill = hasTrivialKill(I->getOperand(0));
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -446,14 +451,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
- uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
- // FIXME: This can be optimized by combining the add with a
- // subsequent one.
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
- NIsKill = true;
+ TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
}
Ty = StTy->getElementType(Field);
} else {
@@ -462,14 +468,26 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
- uint64_t Offs =
+ // N = N + Offset
+ TotalOffs +=
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
- continue;
+ TotalOffs = 0;
}
// N = N + Idx * ElementSize;
@@ -494,6 +512,12 @@ bool FastISel::SelectGetElementPtr(const User *I) {
return false;
}
}
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
// We successfully emitted code for the given LLVM Instruction.
UpdateValueMap(I, N);
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
new file mode 100644
index 0000000000..dbb634df0a
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
+%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
+
+@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
+@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
+@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
+
+define i32* @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+ %addr = alloca i32*, align 4
+ store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #124
+; THUMB: adds r0, #124
+ %0 = load i32** %addr, align 4
+ ret i32* %0
+}
+
+define i32* @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+ %addr = alloca i32*, align 4
+ store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+; ARM: movw r1, #1148
+; ARM: add r0, r0, r1
+; THUMB: addw r0, r0, #1148
+ %0 = load i32** %addr, align 4
+ ret i32* %0
+}
+
+define i32* @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+ %addr = alloca i32*, align 4
+ store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #140
+; THUMB: adds r0, #140
+ %0 = load i32** %addr, align 4
+ ret i32* %0
+}
+
+define i32* @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+ %addr = alloca i32*, align 4
+ store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+; ARM-NOT: movw r{{[0-9]}}, #1060
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM: movw r{{[0-9]}}, #1284
+; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
+ %0 = load i32** %addr, align 4
+ ret i32* %0
+}