1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
|
//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass duplicates basic blocks ending in unconditional branches into
// the tails of their predecessors.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "tailduplication"
#include "llvm/Function.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumTailDups , "Number of tail duplicated blocks");
STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
// Heuristic for tail duplication.
static cl::opt<unsigned>
TailDuplicateSize("tail-dup-size",
cl::desc("Maximum instructions to consider tail duplicating"),
cl::init(2), cl::Hidden);
typedef std::vector<unsigned> AvailableValsTy;
namespace {
/// TailDuplicatePass - Perform tail duplication.
class TailDuplicatePass : public MachineFunctionPass {
const TargetInstrInfo *TII;
MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
// SSAUpdateVRs - A list of virtual registers for which to update SSA form.
SmallVector<unsigned, 16> SSAUpdateVRs;
// SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
// source virtual registers.
DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
public:
static char ID;
explicit TailDuplicatePass() : MachineFunctionPass(&ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "Tail Duplication"; }
private:
void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg);
bool TailDuplicateBlocks(MachineFunction &MF);
bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
void RemoveDeadBlock(MachineBasicBlock *MBB);
};
char TailDuplicatePass::ID = 0;
}
FunctionPass *llvm::createTailDuplicatePass() {
return new TailDuplicatePass();
}
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
MRI = &MF.getRegInfo();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
bool MadeChange = false;
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
MadeChangeThisIteration = false;
MadeChangeThisIteration |= TailDuplicateBlocks(MF);
MadeChange |= MadeChangeThisIteration;
}
return MadeChange;
}
/// TailDuplicateBlocks - Look for small blocks that are unconditionally
/// branched to and do not fall through. Tail-duplicate their instructions
/// into their predecessors to eliminate (dynamic) branches.
bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
bool MadeChange = false;
SSAUpdateVRs.clear();
SSAUpdateVals.clear();
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
MachineBasicBlock *MBB = I++;
// Only duplicate blocks that end with unconditional branches.
if (MBB->canFallThrough())
continue;
MadeChange |= TailDuplicate(MBB, MF);
// If it is dead, remove it.
if (MBB->pred_empty()) {
NumInstrDups -= MBB->size();
RemoveDeadBlock(MBB);
MadeChange = true;
++NumDeadBlocks;
}
}
if (!SSAUpdateVRs.empty()) {
// Update SSA form.
MachineSSAUpdater SSAUpdate(MF);
for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
unsigned VReg = SSAUpdateVRs[i];
SSAUpdate.Initialize(VReg);
// If the original definition is still around, add it as an available
// value.
MachineInstr *DefMI = MRI->getVRegDef(VReg);
MachineBasicBlock *DefBB = 0;
if (DefMI) {
DefBB = DefMI->getParent();
SSAUpdate.AddAvailableValue(DefBB, VReg);
}
// Add the new vregs as available values.
DenseMap<unsigned, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(VReg);
for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
unsigned NewReg = LI->second[j];
MachineInstr *DefMI = MRI->getVRegDef(NewReg);
SSAUpdate.AddAvailableValue(DefMI->getParent(), NewReg);
}
// Rewrite uses that are outside of the original def's block.
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg),
UE = MRI->use_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
if (UseMI->getParent() != DefBB)
SSAUpdate.RewriteUse(UI.getOperand());
}
}
}
return MadeChange;
}
static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
const MachineRegisterInfo *MRI) {
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
UE = MRI->use_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
if (UseMI->getParent() != BB)
return true;
}
return false;
}
static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
if (MI->getOperand(i+1).getMBB() == SrcBB)
return i;
return 0;
}
/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
/// SSA update.
void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg) {
DenseMap<unsigned, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(OrigReg);
if (LI != SSAUpdateVals.end())
LI->second.push_back(NewReg);
else {
AvailableValsTy Vals;
Vals.push_back(NewReg);
SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
SSAUpdateVRs.push_back(OrigReg);
}
}
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
MachineFunction &MF) {
// Don't try to tail-duplicate single-block loops.
if (TailBB->isSuccessor(TailBB))
return false;
// Set the limit on the number of instructions to duplicate, with a default
// of one less than the tail-merge threshold. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch())
// If the target has hardware branch prediction that can handle indirect
// branches, duplicating them can often make them predictable when there
// are common paths through the code. The limit needs to be high enough
// to allow undoing the effects of tail merging.
MaxDuplicateCount = 20;
else if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
MaxDuplicateCount = 1;
else
MaxDuplicateCount = TailDuplicateSize;
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
bool HasCall = false;
for (MachineBasicBlock::iterator I = TailBB->begin();
I != TailBB->end(); ++I) {
// Non-duplicable things shouldn't be tail-duplicated.
if (I->getDesc().isNotDuplicable()) return false;
// Don't duplicate more than the threshold.
if (InstrCount == MaxDuplicateCount) return false;
// Remember if we saw a call.
if (I->getDesc().isCall()) HasCall = true;
if (I->getOpcode() != TargetInstrInfo::PHI)
InstrCount += 1;
}
// Heuristically, don't tail-duplicate calls if it would expand code size,
// as it's less likely to be worth the extra cost.
if (InstrCount > 1 && HasCall)
return false;
// Iterate through all the unique predecessors and tail-duplicate this
// block into them, if possible. Copying the list ahead of time also
// avoids trouble with the predecessor list reallocating.
bool Changed = false;
SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
TailBB->pred_end());
for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
|