1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
|
//===- ExpandVarArgs.cpp - Expand out variable argument function calls-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass expands out all use of variable argument functions.
//
// This pass replaces a varargs function call with a function call in
// which a pointer to the variable arguments is passed explicitly.
// The callee explicitly allocates space for the variable arguments on
// the stack using "alloca".
//
// Alignment:
//
// This pass does not add any alignment padding between the arguments
// that are copied onto the stack. We assume that the only argument
// types that need to be handled are 32-bit and 64-bit -- i32, i64,
// pointers and double:
//
// * We won't see i1, i8, i16 and float as varargs arguments because
// the C standard requires the compiler to promote these to the
// types "int" and "double".
//
// * We won't see va_arg instructions of struct type because Clang
// does not yet support them in PNaCl mode. See
// https://code.google.com/p/nativeclient/issues/detail?id=2381
//
// If such arguments do appear in the input, this pass will generate
// correct, working code, but this code might be inefficient due to
// using unaligned memory accesses.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/NaCl.h"
using namespace llvm;
namespace {
// This is a ModulePass because the pass recreates functions in
// order to change their argument lists.
class ExpandVarArgs : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
ExpandVarArgs() : ModulePass(ID) {
initializeExpandVarArgsPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnModule(Module &M);
};
}
char ExpandVarArgs::ID = 0;
INITIALIZE_PASS(ExpandVarArgs, "expand-varargs",
"Expand out variable argument function definitions and calls",
false, false)
static bool isEmscriptenJSArgsFunc(StringRef Name) {
return Name.equals("emscripten_asm_const_int") ||
Name.equals("emscripten_asm_const_double") ||
Name.equals("emscripten_landingpad") ||
Name.equals("emscripten_resume");
}
static void ExpandVarArgFunc(Function *Func) {
Type *PtrType = Type::getInt8PtrTy(Func->getContext());
FunctionType *FTy = Func->getFunctionType();
SmallVector<Type *, 8> Params(FTy->param_begin(), FTy->param_end());
Params.push_back(PtrType);
FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
Function *NewFunc = RecreateFunction(Func, NFTy);
// Declare the new argument as "noalias".
NewFunc->setAttributes(
Func->getAttributes().addAttribute(
Func->getContext(), FTy->getNumParams() + 1, Attribute::NoAlias));
// Move the arguments across to the new function.
for (Function::arg_iterator Arg = Func->arg_begin(), E = Func->arg_end(),
NewArg = NewFunc->arg_begin();
Arg != E; ++Arg, ++NewArg) {
Arg->replaceAllUsesWith(NewArg);
NewArg->takeName(Arg);
}
Func->eraseFromParent();
Value *VarArgsArg = --NewFunc->arg_end();
VarArgsArg->setName("varargs");
// Expand out uses of llvm.va_start in this function.
for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
BB != E;
++BB) {
for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
Iter != E; ) {
Instruction *Inst = Iter++;
if (VAStartInst *VAS = dyn_cast<VAStartInst>(Inst)) {
Value *Cast = CopyDebug(new BitCastInst(VAS->getArgList(),
PtrType->getPointerTo(),
"arglist", VAS), VAS);
CopyDebug(new StoreInst(VarArgsArg, Cast, VAS), VAS);
VAS->eraseFromParent();
}
}
}
}
static void ExpandVAArgInst(VAArgInst *Inst) {
// Read the argument. We assume that no realignment of the pointer
// is required.
Value *ArgList = CopyDebug(new BitCastInst(
Inst->getPointerOperand(),
Inst->getType()->getPointerTo()->getPointerTo(), "arglist", Inst), Inst);
Value *CurrentPtr = CopyDebug(new LoadInst(ArgList, "arglist_current", Inst),
Inst);
Value *Result = CopyDebug(new LoadInst(CurrentPtr, "va_arg", Inst), Inst);
Result->takeName(Inst);
// Update the va_list to point to the next argument.
SmallVector<Value *, 1> Indexes;
Indexes.push_back(ConstantInt::get(Inst->getContext(), APInt(32, 1)));
Value *Next = CopyDebug(GetElementPtrInst::Create(
CurrentPtr, Indexes, "arglist_next", Inst), Inst);
CopyDebug(new StoreInst(Next, ArgList, Inst), Inst);
Inst->replaceAllUsesWith(Result);
Inst->eraseFromParent();
}
static void ExpandVACopyInst(VACopyInst *Inst) {
// va_list may have more space reserved, but we only need to
// copy a single pointer.
Type *PtrTy = Type::getInt8PtrTy(Inst->getContext())->getPointerTo();
Value *Src = CopyDebug(new BitCastInst(Inst->getSrc(), PtrTy, "vacopy_src",
Inst), Inst);
Value *Dest = CopyDebug(new BitCastInst(Inst->getDest(), PtrTy, "vacopy_dest",
Inst), Inst);
Value *CurrentPtr = CopyDebug(new LoadInst(Src, "vacopy_currentptr", Inst),
Inst);
CopyDebug(new StoreInst(CurrentPtr, Dest, Inst), Inst);
Inst->eraseFromParent();
}
static void LifetimeDecl(Intrinsic::ID id, Value *Ptr, Value *Size,
Instruction *InsertPt) {
Module *M = InsertPt->getParent()->getParent()->getParent();
Value *Func = Intrinsic::getDeclaration(M, id);
SmallVector<Value *, 2> Args;
Args.push_back(Size);
Args.push_back(Ptr);
CallInst::Create(Func, Args, "", InsertPt);
}
// CopyCall() uses argument overloading so that it can be used by the
// template ExpandVarArgCall().
static CallInst *CopyCall(CallInst *Original, Value *Callee,
ArrayRef<Value*> Args) {
return CallInst::Create(Callee, Args, "", Original);
}
static InvokeInst *CopyCall(InvokeInst *Original, Value *Callee,
ArrayRef<Value*> Args) {
return InvokeInst::Create(Callee, Original->getNormalDest(),
Original->getUnwindDest(), Args, "", Original);
}
// ExpandVarArgCall() converts a CallInst or InvokeInst to expand out
// of varargs. It returns whether the module was modified.
template <class InstType>
static bool ExpandVarArgCall(InstType *Call, DataLayout *DL) {
FunctionType *FuncType = cast<FunctionType>(
Call->getCalledValue()->getType()->getPointerElementType());
if (!FuncType->isFunctionVarArg())
return false;
// EMSCRIPTEN: use js varargs for special instrinsics
const Value *CV = Call->getCalledValue();
if (isa<Function>(CV) && isEmscriptenJSArgsFunc(CV->getName())) {
return false;
}
LLVMContext *Context = &Call->getContext();
SmallVector<AttributeSet, 8> Attrs;
Attrs.push_back(Call->getAttributes().getFnAttributes());
Attrs.push_back(Call->getAttributes().getRetAttributes());
// Split argument list into fixed and variable arguments.
SmallVector<Value *, 8> FixedArgs;
SmallVector<Value *, 8> VarArgs;
SmallVector<Type *, 8> VarArgsTypes;
for (unsigned I = 0; I < FuncType->getNumParams(); ++I) {
FixedArgs.push_back(Call->getArgOperand(I));
// AttributeSets use 1-based indexing.
Attrs.push_back(Call->getAttributes().getParamAttributes(I + 1));
}
for (unsigned I = FuncType->getNumParams();
I < Call->getNumArgOperands(); ++I) {
Value *ArgVal = Call->getArgOperand(I);
VarArgs.push_back(ArgVal);
if (Call->getAttributes().hasAttribute(I + 1, Attribute::ByVal)) {
// For "byval" arguments we must dereference the pointer.
VarArgsTypes.push_back(ArgVal->getType()->getPointerElementType());
} else {
VarArgsTypes.push_back(ArgVal->getType());
}
}
if (VarArgsTypes.size() == 0) {
// Some buggy code (e.g. 176.gcc in Spec2k) uses va_arg on an
// empty argument list, which gives undefined behaviour in C. To
// work around such programs, we create a dummy varargs buffer on
// the stack even though there are no arguments to put in it.
// This allows va_arg to read an undefined value from the stack
// rather than crashing by reading from an uninitialized pointer.
// An alternative would be to pass a null pointer to catch the
// invalid use of va_arg.
VarArgsTypes.push_back(Type::getInt32Ty(*Context));
}
// Create struct type for packing variable arguments into. We
// create this as packed for now and assume that no alignment
// padding is desired.
StructType *VarArgsTy = StructType::get(*Context, VarArgsTypes, true);
// Allocate space for the variable argument buffer. Do this at the
// start of the function so that we don't leak space if the function
// is called in a loop.
Function *Func = Call->getParent()->getParent();
AllocaInst *Buf = new AllocaInst(VarArgsTy, "vararg_buffer");
Buf->setAlignment(8); // XXX EMSCRIPTEN: Align for 8-byte aligned doubles.
Func->getEntryBlock().getInstList().push_front(Buf);
// Call llvm.lifetime.start/end intrinsics to indicate that Buf is
// only used for the duration of the function call, so that the
// stack space can be reused elsewhere.
Type *I8Ptr = Type::getInt8Ty(*Context)->getPointerTo();
Instruction *BufPtr = new BitCastInst(Buf, I8Ptr, "vararg_lifetime_bitcast");
BufPtr->insertAfter(Buf);
Value *BufSize = ConstantInt::get(*Context,
APInt(64, DL->getTypeAllocSize(VarArgsTy)));
LifetimeDecl(Intrinsic::lifetime_start, BufPtr, BufSize, Call);
// Copy variable arguments into buffer.
int Index = 0;
for (SmallVector<Value *, 8>::iterator Iter = VarArgs.begin();
Iter != VarArgs.end();
++Iter, ++Index) {
SmallVector<Value *, 2> Indexes;
Indexes.push_back(ConstantInt::get(*Context, APInt(32, 0)));
Indexes.push_back(ConstantInt::get(*Context, APInt(32, Index)));
Value *Ptr = CopyDebug(GetElementPtrInst::Create(
Buf, Indexes, "vararg_ptr", Call), Call);
if (Call->getAttributes().hasAttribute(
FuncType->getNumParams() + Index + 1, Attribute::ByVal)) {
IRBuilder<> Builder(Call);
Builder.CreateMemCpy(
Ptr, *Iter,
DL->getTypeAllocSize((*Iter)->getType()->getPointerElementType()),
/* Align= */ 1);
} else {
StoreInst *S = new StoreInst(*Iter, Ptr, Call);
CopyDebug(S, Call);
S->setAlignment(4); // EMSCRIPTEN: pnacl stack is only 4-byte aligned
}
}
// Cast function to new type to add our extra pointer argument.
SmallVector<Type *, 8> ArgTypes(FuncType->param_begin(),
FuncType->param_end());
ArgTypes.push_back(VarArgsTy->getPointerTo());
FunctionType *NFTy = FunctionType::get(FuncType->getReturnType(),
ArgTypes, false);
/// XXX EMSCRIPTEN: Handle Constants as well as Instructions, since we
/// don't run the ConstantExpr lowering pass.
Value *CastFunc;
if (Constant *C = dyn_cast<Constant>(Call->getCalledValue()))
CastFunc = ConstantExpr::getBitCast(C, NFTy->getPointerTo());
else
CastFunc = CopyDebug(new BitCastInst(Call->getCalledValue(), NFTy->getPointerTo(),
"vararg_func", Call), Call);
// Create the converted function call.
FixedArgs.push_back(Buf);
InstType *NewCall = CopyCall(Call, CastFunc, FixedArgs);
CopyDebug(NewCall, Call);
NewCall->setAttributes(AttributeSet::get(Call->getContext(), Attrs));
NewCall->takeName(Call);
if (isa<CallInst>(Call)) {
LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize, Call);
} else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Call)) {
LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize,
Invoke->getNormalDest()->getFirstInsertionPt());
LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize,
Invoke->getUnwindDest()->getFirstInsertionPt());
}
Call->replaceAllUsesWith(NewCall);
Call->eraseFromParent();
return true;
}
bool ExpandVarArgs::runOnModule(Module &M) {
bool Changed = false;
DataLayout DL(&M);
for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
Function *Func = Iter++;
for (Function::iterator BB = Func->begin(), E = Func->end();
BB != E;
++BB) {
for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
Iter != E; ) {
Instruction *Inst = Iter++;
if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
Changed = true;
ExpandVAArgInst(VI);
} else if (isa<VAEndInst>(Inst)) {
// va_end() is a no-op in this implementation.
Changed = true;
Inst->eraseFromParent();
} else if (VACopyInst *VAC = dyn_cast<VACopyInst>(Inst)) {
Changed = true;
ExpandVACopyInst(VAC);
} else if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
Changed |= ExpandVarArgCall(Call, &DL);
} else if (InvokeInst *Call = dyn_cast<InvokeInst>(Inst)) {
Changed |= ExpandVarArgCall(Call, &DL);
}
}
}
// EMSCRIPTEN: use js varargs for special instrinsics
if (Func->isVarArg() && !isEmscriptenJSArgsFunc(Func->getName())) {
Changed = true;
ExpandVarArgFunc(Func);
}
}
return Changed;
}
ModulePass *llvm::createExpandVarArgsPass() {
return new ExpandVarArgs();
}
|