aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/InstSelectSimple.cpp
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2004-02-25 07:00:55 +0000
committerChris Lattner <sabre@nondot.org>2004-02-25 07:00:55 +0000
commit5f2c7b1975273c0cdd72d86f0b0a408d7d40137e (patch)
treebc9025415e09bbb825462c3c5285cb0b6f0ab438 /lib/Target/X86/InstSelectSimple.cpp
parentb6bac51351d2a1a9db76381b92c40ec24cc59e8e (diff)
Teach the instruction selector how to transform 'array' GEP computations into X86
scaled indexes. This allows us to compile GEP's like this: int* %test([10 x { int, { int } }]* %X, int %Idx) { %Idx = cast int %Idx to long %X = getelementptr [10 x { int, { int } }]* %X, long 0, long %Idx, ubyte 1, ubyte 0 ret int* %X } Into a single address computation: test: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] lea %EAX, DWORD PTR [%EAX + 8*%ECX + 4] ret Before it generated: test: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] shl %ECX, 3 add %EAX, %ECX lea %EAX, DWORD PTR [%EAX + 4] ret This is useful for things like int/float/double arrays, as the indexing can be folded into the loads&stores, reducing register pressure and decreasing the pressure on the decode unit. With these changes, I expect our performance on 256.bzip2 and gzip to improve a lot. On bzip2 for example, we go from this: 10665 asm-printer - Number of machine instrs printed 40 ra-local - Number of loads/stores folded into instructions 1708 ra-local - Number of loads added 1532 ra-local - Number of stores added 1354 twoaddressinstruction - Number of instructions added 1354 twoaddressinstruction - Number of two-address instructions 2794 x86-peephole - Number of peephole optimization performed to this: 9873 asm-printer - Number of machine instrs printed 41 ra-local - Number of loads/stores folded into instructions 1710 ra-local - Number of loads added 1521 ra-local - Number of stores added 789 twoaddressinstruction - Number of instructions added 789 twoaddressinstruction - Number of two-address instructions 2142 x86-peephole - Number of peephole optimization performed ... and these types of instructions are often in tight loops. Linear scan is also helped, but not as much. It goes from: 8787 asm-printer - Number of machine instrs printed 2389 liveintervals - Number of identity moves eliminated after coalescing 2288 liveintervals - Number of interval joins performed 3522 liveintervals - Number of intervals after coalescing 5810 liveintervals - Number of original intervals 700 spiller - Number of loads added 487 spiller - Number of stores added 303 spiller - Number of register spills 1354 twoaddressinstruction - Number of instructions added 1354 twoaddressinstruction - Number of two-address instructions 363 x86-peephole - Number of peephole optimization performed to: 7982 asm-printer - Number of machine instrs printed 1759 liveintervals - Number of identity moves eliminated after coalescing 1658 liveintervals - Number of interval joins performed 3282 liveintervals - Number of intervals after coalescing 4940 liveintervals - Number of original intervals 635 spiller - Number of loads added 452 spiller - Number of stores added 288 spiller - Number of register spills 789 twoaddressinstruction - Number of instructions added 789 twoaddressinstruction - Number of two-address instructions 258 x86-peephole - Number of peephole optimization performed Though I'm not complaining about the drop in the number of intervals. :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11820 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/InstSelectSimple.cpp')
-rw-r--r--lib/Target/X86/InstSelectSimple.cpp47
1 files changed, 23 insertions, 24 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp
index d6e3a75186..d109d061fc 100644
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@@ -2438,11 +2438,30 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
assert(idx->getType() == Type::LongTy && "Bad GEP array index!");
// If idx is a constant, fold it into the offset.
+ unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
- Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue();
+ Disp += TypeSize*CSI->getValue();
} else {
- // If we can't handle it, return.
- return;
+ // If the index reg is already taken, we can't handle this index.
+ if (IndexReg) return;
+
+ // If this is a size that we can handle, then add the index as
+ switch (TypeSize) {
+ case 1: case 2: case 4: case 8:
+ // These are all acceptable scales on X86.
+ Scale = TypeSize;
+ break;
+ default:
+ // Otherwise, we can't handle this scale
+ return;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(idx))
+ if (CI->getOperand(0)->getType() == Type::IntTy ||
+ CI->getOperand(0)->getType() == Type::UIntTy)
+ idx = CI->getOperand(0);
+
+ IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
}
GEPOps.pop_back(); // Consume a GEP operand
@@ -2456,7 +2475,7 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
// FIXME: When addressing modes are more powerful/correct, we could load
// global addresses directly as 32-bit immediates.
assert(BaseReg == 0);
- BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0;
+ BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
GEPOps.pop_back(); // Consume the last GEP operand
}
@@ -2538,26 +2557,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
}
break; // we are now done
- } else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
- // It's a struct access. CUI is the index into the structure,
- // which names the field. This index must have unsigned type.
- const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
- GEPOps.pop_back(); // Consume a GEP operand
- GEPTypes.pop_back();
-
- // Use the TargetData structure to pick out what the layout of the
- // structure is in memory. Since the structure index must be constant, we
- // can get its value and use it to find the right byte offset from the
- // StructLayout class's list of structure member offsets.
- unsigned idxValue = CUI->getValue();
- unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
- if (FieldOff) {
- unsigned Reg = makeAnotherReg(Type::UIntTy);
- // Emit an ADD to add FieldOff to the basePtr.
- BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff);
- --IP; // Insert the next instruction before this one.
- TargetReg = Reg; // Codegen the rest of the GEP into this
- }
} else {
// It's an array or pointer access: [ArraySize x ElementType].
const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());