1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
//===- LowerEmSetjmp - Lower setjmp/longjmp for Emscripten/JS -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Lowers setjmp to a reasonably-performant approach for emscripten. The idea
// is that each block with a setjmp is broken up into the part right after
// the setjmp, and a new basic block is added which is either reached from
// the setjmp, or later from a longjmp. To handle the longjmp, all calls that
// might longjmp are checked immediately afterwards.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/NaCl.h"
#include <vector>
#include <set>
#include "llvm/Support/raw_ostream.h"
#include <stdio.h>
#define dump(x) fprintf(stderr, x "\n")
#define dumpv(x, ...) fprintf(stderr, x "\n", __VA_ARGS__)
#define dumpfail(x) { fprintf(stderr, x "\n"); fprintf(stderr, "%s : %d\n", __FILE__, __LINE__); report_fatal_error("fail"); }
#define dumpfailv(x, ...) { fprintf(stderr, x "\n", __VA_ARGS__); fprintf(stderr, "%s : %d\n", __FILE__, __LINE__); report_fatal_error("fail"); }
#define dumpIR(value) { \
std::string temp; \
raw_string_ostream stream(temp); \
stream << *(value); \
fprintf(stderr, "%s\n", temp.c_str()); \
}
#undef assert
#define assert(x) { if (!(x)) dumpfail(#x); }
using namespace llvm;
namespace {
class LowerEmSetjmp : public ModulePass {
Module *TheModule;
public:
static char ID; // Pass identification, replacement for typeid
explicit LowerEmSetjmp() : ModulePass(ID), TheModule(NULL) {
initializeLowerEmSetjmpPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M);
};
}
char LowerEmSetjmp::ID = 0;
INITIALIZE_PASS(LowerEmSetjmp, "loweremsetjmp",
"Lower setjmp and longjmp for js/emscripten",
false, false)
bool LowerEmSetjmp::runOnModule(Module &M) {
TheModule = &M;
Function *Setjmp = TheModule->getFunction("setjmp");
Function *Longjmp = TheModule->getFunction("longjmp");
if (!Setjmp && !Longjmp) return false;
Type *i1 = Type::getInt1Ty(M.getContext());
Type *i32 = Type::getInt32Ty(M.getContext());
Type *Void = Type::getVoidTy(M.getContext());
// Add functions
Function *EmSetjmp = NULL;
if (Setjmp) {
SmallVector<Type*, 2> EmSetjmpTypes;
EmSetjmpTypes.push_back(Setjmp->getFunctionType()->getParamType(0));
EmSetjmpTypes.push_back(i32); // extra param that says which setjmp in the function it is
FunctionType *EmSetjmpFunc = FunctionType::get(i32, EmSetjmpTypes, false);
EmSetjmp = Function::Create(EmSetjmpFunc, GlobalValue::ExternalLinkage, "emscripten_setjmp", TheModule);
}
Function *EmLongjmp = Longjmp ? Function::Create(Longjmp->getFunctionType(), GlobalValue::ExternalLinkage, "emscripten_longjmp", TheModule) : NULL;
SmallVector<Type*, 1> IntArgTypes;
IntArgTypes.push_back(i32);
FunctionType *IntIntFunc = FunctionType::get(i32, IntArgTypes, false);
Function *CheckLongjmp = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_check_longjmp", TheModule); // gets control flow
Function *GetLongjmpResult = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_get_longjmp_result", TheModule); // gets int value longjmp'd
FunctionType *VoidFunc = FunctionType::get(Void, false);
Function *PrepSetjmp = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_prep_setjmp", TheModule);
Function *PreInvoke = TheModule->getFunction("emscripten_preinvoke");
if (!PreInvoke) PreInvoke = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_preinvoke", TheModule);
FunctionType *IntFunc = FunctionType::get(i32, false);
Function *PostInvoke = TheModule->getFunction("emscripten_postinvoke");
if (!PostInvoke) PostInvoke = Function::Create(IntFunc, GlobalValue::ExternalLinkage, "emscripten_postinvoke", TheModule);
// Process all callers of setjmp and longjmp. Start with setjmp.
typedef std::vector<PHINode*> Phis;
typedef std::map<Function*, Phis> FunctionPhisMap;
FunctionPhisMap SetjmpOutputPhis;
if (Setjmp) {
for (Instruction::use_iterator UI = Setjmp->use_begin(), UE = Setjmp->use_end(); UI != UE; ++UI) {
Instruction *U = dyn_cast<Instruction>(*UI);
if (CallInst *CI = dyn_cast<CallInst>(U)) {
BasicBlock *SJBB = CI->getParent();
// The tail is everything right after the call, and will be reached once when setjmp is
// called, and later when longjmp returns to the setjmp
BasicBlock *Tail = SplitBlock(SJBB, CI->getNextNode(), this);
// Add a phi to the tail, which will be the output of setjmp, which indicates if this is the
// first call or a longjmp back. The phi directly uses the right value based on where we
// arrive from
PHINode *SetjmpOutput = PHINode::Create(i32, 2, "", Tail->getFirstNonPHI());
SetjmpOutput->addIncoming(ConstantInt::get(i32, 0), SJBB); // setjmp initial call returns 0
CI->replaceAllUsesWith(SetjmpOutput); // The proper output is now this, not the setjmp call itself
// longjmp returns to the setjmp will add themselves to this phi
Phis& P = SetjmpOutputPhis[SJBB->getParent()];
P.push_back(SetjmpOutput);
// fix call target
SmallVector<Value *, 2> Args;
Args.push_back(CI->getArgOperand(0));
Args.push_back(ConstantInt::get(i32, P.size())); // our index in the function is our place in the array + 1
CallInst::Create(EmSetjmp, Args, "", CI);
CI->eraseFromParent();
} else if (InvokeInst *CI = dyn_cast<InvokeInst>(U)) {
assert("TODO: invoke a setjmp");
} else {
dumpIR(U);
assert("bad use of setjmp, should only call it");
}
}
}
// Update longjmp FIXME: we could avoid throwing in longjmp as an optimization when longjmping back into the current function perhaps?
if (Longjmp) Longjmp->replaceAllUsesWith(EmLongjmp);
// Update all setjmping functions
for (FunctionPhisMap::iterator I = SetjmpOutputPhis.begin(); I != SetjmpOutputPhis.end(); I++) {
Function *F = I->first;
Phis& P = I->second;
CallInst::Create(PrepSetjmp, "", F->begin()->begin()); // FIXME: adding after other allocas might be better
// Update each call that can longjmp so it can return to a setjmp where relevant
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ) {
BasicBlock *BB = BBI++;
for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) {
Instruction *I = Iter++;
CallInst *CI;
if ((CI = dyn_cast<CallInst>(I))) {
Value *V = CI->getCalledValue();
if (V == PrepSetjmp || V == EmSetjmp || V == CheckLongjmp || V == GetLongjmpResult || V == PreInvoke || V == PostInvoke) continue;
if (Function *CF = dyn_cast<Function>(V)) if (CF->isIntrinsic()) continue;
// TODO: proper analysis of what can actually longjmp. Currently we assume anything but setjmp can.
// This may longjmp, so we need to check if it did. Split at that point, and
// envelop the call in pre/post invoke, if we need to
CallInst *After;
Instruction *Check = NULL;
if (Iter != E && (After = dyn_cast<CallInst>(Iter)) && After->getCalledValue() == PostInvoke) {
// use the pre|postinvoke that exceptions lowering already made
Check = Iter++;
}
BasicBlock *Tail = SplitBlock(BB, Iter, this); // Iter already points to the next instruction, as we need
TerminatorInst *TI = BB->getTerminator();
if (!Check) {
// no existing pre|postinvoke, create our own
CallInst::Create(PreInvoke, "", CI);
Check = CallInst::Create(PostInvoke, "", TI); // CI is at the end of the block
}
// We need to replace the terminator in Tail - SplitBlock makes BB go straight to Tail, we need to check if a longjmp occurred, and
// go to the right setjmp-tail if so
SmallVector<Value *, 1> Args;
Args.push_back(Check);
Instruction *LongjmpCheck = CallInst::Create(CheckLongjmp, Args, "", BB);
Instruction *LongjmpResult = CallInst::Create(GetLongjmpResult, Args, "", BB);
SwitchInst *SI = SwitchInst::Create(LongjmpCheck, Tail, 2, BB);
// -1 means no longjmp happened, continue normally (will hit the default switch case). 0 means a longjmp that is not ours to handle, needs a rethrow. Otherwise
// the index mean is the same as the index in P+1 (to avoid 0).
for (unsigned i = 0; i < P.size(); i++) {
SI->addCase(cast<ConstantInt>(ConstantInt::get(i32, i+1)), P[i]->getParent());
P[i]->addIncoming(LongjmpResult, BB);
}
TI->eraseFromParent(); // new terminator is now the switch
// we are splitting the block here, and must continue to find other calls in the block - which is now split. so continue
// to traverse in the Tail
BB = Tail;
Iter
|