diff options
author | Mark Seaborn <mseaborn@chromium.org> | 2012-11-29 18:42:19 -0800 |
---|---|---|
committer | Mark Seaborn <mseaborn@chromium.org> | 2012-11-29 18:42:19 -0800 |
commit | ffb0eedef4f034996ce59aac3176482617a8044c (patch) | |
tree | 03195b67f4e4770e961a4487fcdc5ac9e52bbb17 | |
parent | 7a51b9c26dfc7fb9df8047c553227b47d64ba5c6 (diff) |
PNaCl: Add ExpandTls pass for expanding out static TLS variables
This replaces each reference to a TLS variable "foo" with the LLVM IR
equivalent of the expression:
((struct tls_template *) __nacl_read_tp())->foo
This pass fills out the global variables __tls_template_start,
__tls_template_end etc. which are used by src/untrusted/nacl/tls.c.
These are the symbols that are otherwise defined by a binutils linker
script.
In order to handle the case of TLS variables that occur inside
ConstantExprs, we have a helper pass, ExpandTlsConstantExpr.
BUG=http://code.google.com/p/nativeclient/issues/detail?id=2837
TEST=test/Transforms/NaCl/expand-tls*.ll
Review URL: https://chromiumcodereview.appspot.com/10896042
-rw-r--r-- | include/llvm/InitializePasses.h | 2 | ||||
-rw-r--r-- | include/llvm/Transforms/NaCl.h | 2 | ||||
-rw-r--r-- | lib/Transforms/NaCl/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Transforms/NaCl/ExpandTls.cpp | 351 | ||||
-rw-r--r-- | lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp | 110 | ||||
-rw-r--r-- | test/Transforms/NaCl/expand-tls-aligned.ll | 42 | ||||
-rw-r--r-- | test/Transforms/NaCl/expand-tls-bss.ll | 17 | ||||
-rw-r--r-- | test/Transforms/NaCl/expand-tls-constexpr.ll | 116 | ||||
-rw-r--r-- | test/Transforms/NaCl/expand-tls-constexpr2.ll | 12 | ||||
-rw-r--r-- | test/Transforms/NaCl/expand-tls-phi.ll | 23 | ||||
-rw-r--r-- | test/Transforms/NaCl/expand-tls.ll | 85 | ||||
-rw-r--r-- | test/Transforms/NaCl/lit.local.cfg | 3 | ||||
-rw-r--r-- | tools/opt/opt.cpp | 2 |
13 files changed, 767 insertions, 0 deletions
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index a6b7d31817..c794308f73 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -266,6 +266,8 @@ void initializeLoopVectorizePass(PassRegistry&); void initializeBBVectorizePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); void initializeExpandCtorsPass(PassRegistry&); // @LOCALMOD +void initializeExpandTlsPass(PassRegistry&); // @LOCALMOD +void initializeExpandTlsConstantExprPass(PassRegistry&); // @LOCALMOD void initializeNaClCcRewritePass(PassRegistry&); // @LOCALMOD } diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h index fe29463a8b..79c9b9fe79 100644 --- a/include/llvm/Transforms/NaCl.h +++ b/include/llvm/Transforms/NaCl.h @@ -15,6 +15,8 @@ namespace llvm { class ModulePass; ModulePass *createExpandCtorsPass(); +ModulePass *createExpandTlsPass(); +ModulePass *createExpandTlsConstantExprPass(); } diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt index d634ad9655..5e24cc7e28 100644 --- a/lib/Transforms/NaCl/CMakeLists.txt +++ b/lib/Transforms/NaCl/CMakeLists.txt @@ -1,5 +1,7 @@ add_llvm_library(LLVMTransformsNaCl ExpandCtors.cpp + ExpandTls.cpp + ExpandTlsConstantExpr.cpp ) add_dependencies(LLVMTransformsNaCl intrinsics_gen) diff --git a/lib/Transforms/NaCl/ExpandTls.cpp b/lib/Transforms/NaCl/ExpandTls.cpp new file mode 100644 index 0000000000..8ce439c018 --- /dev/null +++ b/lib/Transforms/NaCl/ExpandTls.cpp @@ -0,0 +1,351 @@ +//===- ExpandTls.cpp - Convert TLS variables to a concrete layout----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out uses of thread-local (TLS) variables into +// more primitive operations. +// +// A reference to the address of a TLS variable is expanded into code +// which gets the current thread's thread pointer using +// @llvm.nacl.read.tp() and adds a fixed offset. +// +// This pass allocates the offsets (relative to the thread pointer) +// that will be used for TLS variables. It sets up the global +// variables __tls_template_start, __tls_template_end etc. to contain +// a template for initializing TLS variables' values for each thread. +// This is a task normally performed by the linker in ELF systems. +// +//===----------------------------------------------------------------------===// + +#include <vector> + +#include "llvm/Constants.h" +#include "llvm/DataLayout.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + struct VarInfo { + GlobalVariable *TlsVar; + bool IsBss; // Whether variable is in zero-intialized part of template + int TemplateIndex; + }; + + class PassState { + public: + PassState(Module *M): M(M), DL(M), Offset(0), Alignment(1) {} + + Module *M; + DataLayout DL; + uint64_t Offset; + // 'Alignment' is the maximum variable alignment seen so far, in + // bytes. After visiting all TLS variables, this is the overall + // alignment required for the TLS template. + uint32_t Alignment; + }; + + class ExpandTls : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandTls() : ModulePass(ID) { + initializeExpandTlsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandTls::ID = 0; +INITIALIZE_PASS(ExpandTls, "nacl-expand-tls", + "Expand out TLS variables and fix TLS variable layout", + false, false) + +static void setGlobalVariableValue(Module &M, const char *Name, + Constant *Value) { + GlobalVariable *Var = M.getNamedGlobal(Name); + if (!Var) { + // This warning can happen in a program that does not use a libc + // and does not initialize TLS variables. Such a program might be + // linked with "-nostdlib". + errs() << "Warning: Variable " << Name << " not referenced\n"; + } else { + if (Var->hasInitializer()) { + report_fatal_error(std::string("Variable ") + Name + + " already has an initializer"); + } + Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType())); + Var->eraseFromParent(); + } +} + +// Insert alignment padding into the TLS template. +static void padToAlignment(PassState *State, + std::vector<Type*> *FieldTypes, + std::vector<Constant*> *FieldValues, + unsigned Alignment) { + if ((State->Offset & (Alignment - 1)) != 0) { + unsigned PadSize = Alignment - (State->Offset & (Alignment - 1)); + Type *i8 = Type::getInt8Ty(State->M->getContext()); + Type *PadType = ArrayType::get(i8, PadSize); + FieldTypes->push_back(PadType); + if (FieldValues) + FieldValues->push_back(Constant::getNullValue(PadType)); + State->Offset += PadSize; + } + if (State->Alignment < Alignment) { + State->Alignment = Alignment; + } +} + +static void addVarToTlsTemplate(PassState *State, + std::vector<Type*> *FieldTypes, + std::vector<Constant*> *FieldValues, + GlobalVariable *TlsVar) { + unsigned Alignment = State->DL.getPreferredAlignment(TlsVar); + padToAlignment(State, FieldTypes, FieldValues, Alignment); + + FieldTypes->push_back(TlsVar->getType()->getElementType()); + if (FieldValues) + FieldValues->push_back(TlsVar->getInitializer()); + State->Offset += + State->DL.getTypeAllocSize(TlsVar->getType()->getElementType()); +} + +static PointerType *buildTlsTemplate(Module &M, std::vector<VarInfo> *TlsVars) { + std::vector<Type*> FieldBssTypes; + std::vector<Type*> FieldInitTypes; + std::vector<Constant*> FieldInitValues; + PassState State(&M); + + for (Module::global_iterator GV = M.global_begin(); + GV != M.global_end(); + ++GV) { + if (GV->isThreadLocal()) { + if (!GV->hasInitializer()) { + // Since this is a whole-program transformation, "extern" TLS + // variables are not allowed at this point. + report_fatal_error(std::string("TLS variable without an initializer: ") + + GV->getName()); + } + if (!GV->getInitializer()->isNullValue()) { + addVarToTlsTemplate(&State, &FieldInitTypes, + &FieldInitValues, GV); + VarInfo Info; + Info.TlsVar = GV; + Info.IsBss = false; + Info.TemplateIndex = FieldInitTypes.size() - 1; + TlsVars->push_back(Info); + } + } + } + // Handle zero-initialized TLS variables in a second pass, because + // these should follow non-zero-initialized TLS variables. + for (Module::global_iterator GV = M.global_begin(); + GV != M.global_end(); + ++GV) { + if (GV->isThreadLocal() && GV->getInitializer()->isNullValue()) { + addVarToTlsTemplate(&State, &FieldBssTypes, NULL, GV); + VarInfo Info; + Info.TlsVar = GV; + Info.IsBss = true; + Info.TemplateIndex = FieldBssTypes.size() - 1; + TlsVars->push_back(Info); + } + } + // Add final alignment padding so that + // (struct tls_struct *) __nacl_read_tp() - 1 + // gives the correct, aligned start of the TLS variables given the + // x86-style layout we are using. This requires some more bytes to + // be memset() to zero at runtime. This wastage doesn't seem + // important gives that we're not trying to optimize packing by + // reordering to put similarly-aligned variables together. + padToAlignment(&State, &FieldBssTypes, NULL, State.Alignment); + + // We create the TLS template structs as "packed" because we insert + // alignment padding ourselves, and LLVM's implicit insertion of + // padding would interfere with ours. tls_bss_template can start at + // a non-aligned address immediately following the last field in + // tls_init_template. + StructType *InitTemplateType = + StructType::create(M.getContext(), "tls_init_template"); + InitTemplateType->setBody(FieldInitTypes, /*isPacked=*/true); + StructType *BssTemplateType = + StructType::create(M.getContext(), "tls_bss_template"); + BssTemplateType->setBody(FieldBssTypes, /*isPacked=*/true); + + StructType *TemplateType = StructType::create(M.getContext(), "tls_struct"); + SmallVector<Type*, 2> TemplateTopFields; + TemplateTopFields.push_back(InitTemplateType); + TemplateTopFields.push_back(BssTemplateType); + TemplateType->setBody(TemplateTopFields, /*isPacked=*/true); + PointerType *TemplatePtrType = PointerType::get(TemplateType, 0); + + // We define the following symbols, which are the same as those + // defined by NaCl's original customized binutils linker scripts: + // __tls_template_start + // __tls_template_tdata_end + // __tls_template_end + // We also define __tls_template_alignment, which was not defined by + // the original linker scripts. + + const char *StartSymbol = "__tls_template_start"; + Constant *TemplateData = ConstantStruct::get(InitTemplateType, + FieldInitValues); + GlobalVariable *TemplateDataVar = + new GlobalVariable(M, InitTemplateType, /*isConstant=*/true, + GlobalValue::InternalLinkage, TemplateData); + setGlobalVariableValue(M, StartSymbol, TemplateDataVar); + TemplateDataVar->setName(StartSymbol); + + Constant *TdataEnd = ConstantExpr::getGetElementPtr( + TemplateDataVar, + ConstantInt::get(M.getContext(), APInt(32, 1))); + setGlobalVariableValue(M, "__tls_template_tdata_end", TdataEnd); + + Constant *TotalEnd = ConstantExpr::getGetElementPtr( + ConstantExpr::getBitCast(TemplateDataVar, TemplatePtrType), + ConstantInt::get(M.getContext(), APInt(32, 1))); + setGlobalVariableValue(M, "__tls_template_end", TotalEnd); + + const char *AlignmentSymbol = "__tls_template_alignment"; + Type *i32 = Type::getInt32Ty(M.getContext()); + GlobalVariable *AlignmentVar = new GlobalVariable( + M, i32, /*isConstant=*/true, + GlobalValue::InternalLinkage, + ConstantInt::get(M.getContext(), APInt(32, State.Alignment))); + setGlobalVariableValue(M, AlignmentSymbol, AlignmentVar); + AlignmentVar->setName(AlignmentSymbol); + + return TemplatePtrType; +} + +static void rewriteTlsVars(Module &M, std::vector<VarInfo> *TlsVars, + PointerType *TemplatePtrType) { + // Set up the intrinsic that reads the thread pointer. + Type *i8 = Type::getInt8Ty(M.getContext()); + FunctionType *ReadTpType = FunctionType::get(PointerType::get(i8, 0), + /*isVarArg=*/false); + AttrBuilder B; + B.addAttribute(Attributes::ReadOnly); + B.addAttribute(Attributes::NoUnwind); + AttrListPtr ReadTpAttrs = AttrListPtr().addAttr( + M.getContext(), AttrListPtr::FunctionIndex, + Attributes::get(M.getContext(), B)); + Constant *ReadTpFunc = M.getOrInsertTargetIntrinsic("llvm.nacl.read.tp", + ReadTpType, + ReadTpAttrs); + + for (std::vector<VarInfo>::iterator VarInfo = TlsVars->begin(); + VarInfo != TlsVars->end(); + ++VarInfo) { + GlobalVariable *Var = VarInfo->TlsVar; + while (!Var->use_empty()) { + Instruction *U = cast<Instruction>(*Var->use_begin()); + Instruction *InsertPt = U; + if (PHINode *PN = dyn_cast<PHINode>(InsertPt)) { + // We cannot insert instructions before a PHI node, so insert + // before the incoming block's terminator. Note that if the + // terminator is conditional, this could be suboptimal, + // because we might be calling ReadTpFunc unnecessarily. + InsertPt = PN->getIncomingBlock(Var->use_begin())->getTerminator(); + } + Value *RawThreadPtr = CallInst::Create(ReadTpFunc, "tls_raw", InsertPt); + Value *TypedThreadPtr = new BitCastInst(RawThreadPtr, TemplatePtrType, + "tls_struct", InsertPt); + SmallVector<Value*, 3> Indexes; + // We use -1 because we use the x86-style TLS layout in which + // the TLS data is stored at addresses below the thread pointer. + // This is largely because a check in nacl_irt_thread_create() + // in irt/irt_thread.c requires the thread pointer to be a + // self-pointer on x86-32. + // TODO(mseaborn): I intend to remove that check because it is + // non-portable. In the mean time, we want PNaCl pexes to work + // in older Chromium releases when translated to nexes. + Indexes.push_back(ConstantInt::get( + M.getContext(), APInt(32, -1))); + Indexes.push_back(ConstantInt::get( + M.getContext(), APInt(32, VarInfo->IsBss ? 1 : 0))); + Indexes.push_back(ConstantInt::get( + M.getContext(), APInt(32, VarInfo->TemplateIndex))); + Value *TlsField = GetElementPtrInst::Create(TypedThreadPtr, Indexes, + "field", InsertPt); + U->replaceUsesOfWith(Var, TlsField); + } + VarInfo->TlsVar->eraseFromParent(); + } +} + +// Provide fixed definitions for PNaCl's TLS layout intrinsics. We +// adopt the x86-style layout: ExpandTls will output a program that +// uses the x86-style layout wherever it runs. This overrides any +// architecture-specific definitions of the intrinsics that the LLVM +// backend might provide. +static void defineTlsLayoutIntrinsics(Module &M) { + Type *i32 = Type::getInt32Ty(M.getContext()); + SmallVector<Type*, 1> ArgTypes; + ArgTypes.push_back(i32); + FunctionType *FuncType = FunctionType::get(i32, ArgTypes, /*isVarArg=*/false); + Function *NewFunc; + BasicBlock *BB; + + // Define the intrinsic as follows: + // uint32_t __nacl_tp_tdb_offset(uint32_t tdb_size) { + // return 0; + // } + // This means the thread pointer points to the TDB. + NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage, + "nacl_tp_tdb_offset", &M); + BB = BasicBlock::Create(M.getContext(), "entry", NewFunc); + ReturnInst::Create(M.getContext(), + ConstantInt::get(M.getContext(), APInt(32, 0)), BB); + if (Function *Intrinsic = M.getFunction("llvm.nacl.tp.tdb.offset")) { + Intrinsic->replaceAllUsesWith(NewFunc); + Intrinsic->eraseFromParent(); + } + + // Define the intrinsic as follows: + // uint32_t __nacl_tp_tls_offset(uint32_t tls_size) { + // return -tls_size; + // } + // This means the TLS variables are stored below the thread pointer. + NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage, + "nacl_tp_tls_offset", &M); + BB = BasicBlock::Create(M.getContext(), "entry", NewFunc); + Value *Arg = NewFunc->arg_begin(); + Arg->setName("size"); + Value *Result = BinaryOperator::CreateNeg(Arg, "result", BB); + ReturnInst::Create(M.getContext(), Result, BB); + if (Function *Intrinsic = M.getFunction("llvm.nacl.tp.tls.offset")) { + Intrinsic->replaceAllUsesWith(NewFunc); + Intrinsic->eraseFromParent(); + } +} + +bool ExpandTls::runOnModule(Module &M) { + ModulePass *Pass = createExpandTlsConstantExprPass(); + Pass->runOnModule(M); + delete Pass; + + std::vector<VarInfo> TlsVars; + PointerType *TemplatePtrType = buildTlsTemplate(M, &TlsVars); + rewriteTlsVars(M, &TlsVars, TemplatePtrType); + + defineTlsLayoutIntrinsics(M); + + return true; +} + +ModulePass *llvm::createExpandTlsPass() { + return new ExpandTls(); +} diff --git a/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp b/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp new file mode 100644 index 0000000000..90e007604f --- /dev/null +++ b/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp @@ -0,0 +1,110 @@ +//===- ExpandTlsConstantExpr.cpp - Convert ConstantExprs to Instructions---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is a helper used by the ExpandTls pass. +// +// LLVM treats the address of a TLS variable as a ConstantExpr. This +// is arguably a bug because the address of a TLS variable is *not* a +// constant: it varies between threads. +// +// See http://llvm.org/bugs/show_bug.cgi?id=14353 +// +// This is also a problem for the ExpandTls pass, which wants to use +// replaceUsesOfWith() to replace each TLS variable with an +// Instruction sequence that calls @llvm.nacl.read.tp(). This doesn't +// work if the TLS variable is used inside other ConstantExprs, +// because ConstantExprs are interned and are not associated with any +// function, whereas each Instruction must be part of a function. +// +// To fix that problem, this pass converts ConstantExprs that +// reference TLS variables into Instructions. +// +// For example, this use of a 'ptrtoint' ConstantExpr: +// +// ret i32 ptrtoint (i32* @tls_var to i32) +// +// is converted into this 'ptrtoint' Instruction: +// +// %expanded = ptrtoint i32* @tls_var to i32 +// ret i32 %expanded +// +//===----------------------------------------------------------------------===// + +#include <vector> + +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + class ExpandTlsConstantExpr : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandTlsConstantExpr() : ModulePass(ID) { + initializeExpandTlsConstantExprPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandTlsConstantExpr::ID = 0; +INITIALIZE_PASS(ExpandTlsConstantExpr, "nacl-expand-tls-constant-expr", + "Eliminate ConstantExpr references to TLS variables", + false, false) + +// This removes ConstantExpr references to the given Constant. +static void expandConstExpr(Constant *Expr) { + // First, ensure that ConstantExpr references to Expr are converted + // to Instructions so that we can modify them. + for (Value::use_iterator UI = Expr->use_begin(); + UI != Expr->use_end(); + ++UI) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { + expandConstExpr(CE); + } + } + Expr->removeDeadConstantUsers(); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Expr)) { + while (!Expr->use_empty()) { + Instruction *U = cast<Instruction>(*Expr->use_begin()); + Instruction *InsertPt = U; + if (PHINode *PN = dyn_cast<PHINode>(InsertPt)) { + // We cannot insert instructions before a PHI node, so insert + // before the incoming block's terminator. This could be + // suboptimal if the terminator is a conditional. + InsertPt = PN->getIncomingBlock(Expr->use_begin())->getTerminator(); + } + Instruction *NewInst = CE->getAsInstruction(); + NewInst->insertBefore(InsertPt); + NewInst->setName("expanded"); + U->replaceUsesOfWith(CE, NewInst); + } + } +} + +bool ExpandTlsConstantExpr::runOnModule(Module &M) { + for (Module::global_iterator Global = M.global_begin(); + Global != M.global_end(); + ++Global) { + if (Global->isThreadLocal()) { + expandConstExpr(Global); + } + } + return true; +} + +ModulePass *llvm::createExpandTlsConstantExprPass() { + return new ExpandTlsConstantExpr(); +} diff --git a/test/Transforms/NaCl/expand-tls-aligned.ll b/test/Transforms/NaCl/expand-tls-aligned.ll new file mode 100644 index 0000000000..75f03ba306 --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-aligned.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + +target datalayout = "p:32:32:32" + + +@var = global i32 123 + +; Put this first to check that the pass handles BSS variables last. +@bss_tvar_aligned = thread_local global i32 0, align 64 + +@tvar1 = thread_local global i16 234 +; Test a pointer to check we are getting the right pointer size. +@tvar2 = thread_local global i32* @var +@tvar_aligned = thread_local global i8 99, align 32 + + +; CHECK: %tls_init_template = type <{ i16, [2 x i8], i32*, [24 x i8], i8 }> +; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }> + +; This struct type must be "packed" because the 31 byte padding here +; is followed by an i32. +; CHECK: %tls_bss_template = type <{ [31 x i8], i32, [60 x i8] }> + +; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i16 234, [2 x i8] zeroinitializer, i32* @var, [24 x i8] zeroinitializer, i8 99 }> + +; CHECK: @__tls_template_alignment = internal constant i32 64 + + +; Create references to __tls_template_* to keep these live, otherwise +; the definition of %tls_struct (which we check for above) is removed +; from the output. + +@__tls_template_tdata_end = external global i8 +@__tls_template_end = external global i8 + +define i8* @get_tls_template_tdata_end() { + ret i8* @__tls_template_tdata_end +} + +define i8* @get_tls_template_end() { + ret i8* @__tls_template_end +} diff --git a/test/Transforms/NaCl/expand-tls-bss.ll b/test/Transforms/NaCl/expand-tls-bss.ll new file mode 100644 index 0000000000..02504611f0 --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-bss.ll @@ -0,0 +1,17 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + + +@tvar_bss1 = thread_local global i64 0 +@tvar_bss2 = thread_local global i32 0 + + +; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }> +; CHECK: %tls_bss_template = type <{ i64, i32, [4 x i8] }> + + +define i64* @get_tvar_bss1() { + ret i64* @tvar_bss1 +} +; CHECK: define i64* @get_tvar_bss1() +; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 1, i32 0 +; CHECK: ret i64* %field diff --git a/test/Transforms/NaCl/expand-tls-constexpr.ll b/test/Transforms/NaCl/expand-tls-constexpr.ll new file mode 100644 index 0000000000..06bb8ed830 --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-constexpr.ll @@ -0,0 +1,116 @@ +; RUN: opt < %s -nacl-expand-tls-constant-expr -S | FileCheck %s + +@tvar = thread_local global i32 0 + + +define i32 @test_converting_ptrtoint() { + ret i32 ptrtoint (i32* @tvar to i32) +} +; CHECK: define i32 @test_converting_ptrtoint() +; CHECK: %expanded = ptrtoint i32* @tvar to i32 +; CHECK: ret i32 %expanded + + +define i32 @test_converting_add() { + ret i32 add (i32 ptrtoint (i32* @tvar to i32), i32 4) +} +; CHECK: define i32 @test_converting_add() +; CHECK: %expanded1 = ptrtoint i32* @tvar to i32 +; CHECK: %expanded = add i32 %expanded1, 4 +; CHECK: ret i32 %expanded + + +define i32 @test_converting_multiple_operands() { + ret i32 add (i32 ptrtoint (i32* @tvar to i32), + i32 ptrtoint (i32* @tvar to i32)) +} +; CHECK: define i32 @test_converting_multiple_operands() +; CHECK: %expanded1 = ptrtoint i32* @tvar to i32 +; CHECK: %expanded = add i32 %expanded1, %expanded1 +; CHECK: ret i32 %expanded + + +define i32 @test_allocating_new_var_name(i32 %expanded) { + %result = add i32 %expanded, ptrtoint (i32* @tvar to i32) + ret i32 %result +} +; CHECK: define i32 @test_allocating_new_var_name(i32 %expanded) +; CHECK: %expanded1 = ptrtoint i32* @tvar to i32 +; CHECK: %result = add i32 %expanded, %expanded1 +; CHECK: ret i32 %result + + +define i8* @test_converting_bitcast() { + ret i8* bitcast (i32* @tvar to i8*) +} +; CHECK: define i8* @test_converting_bitcast() +; CHECK: %expanded = bitcast i32* @tvar to i8* +; CHECK: ret i8* %expanded + + +define i32* @test_converting_getelementptr() { + ; Use an index >1 to ensure that "inbounds" is not added automatically. + ret i32* getelementptr (i32* @tvar, i32 2) +} +; CHECK: define i32* @test_converting_getelementptr() +; CHECK: %expanded = getelementptr i32* @tvar, i32 2 +; CHECK: ret i32* %expanded + + +; This is identical to @test_converting_getelementptr(). +; We need to check that both copies of getelementptr are fixed. +define i32* @test_converting_getelementptr_copy() { + ret i32* getelementptr (i32* @tvar, i32 2) +} +; CHECK: define i32* @test_converting_getelementptr_copy() +; CHECK: %expanded = getelementptr i32* @tvar, i32 2 +; CHECK: ret i32* %expanded + + +define i32* @test_converting_getelementptr_inbounds() { + ret i32* getelementptr inbounds (i32* @tvar, i32 2) +} +; CHECK: define i32* @test_converting_getelementptr_inbounds() +; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 2 +; CHECK: ret i32* %expanded + + +define i32* @test_converting_phi(i1 %cmp) { +entry: + br i1 %cmp, label %return, label %else + +else: + br label %return + +return: + %result = phi i32* [ getelementptr (i32* @tvar, i32 1), %entry ], [ null, %else ] + ret i32* %result +} +; The converted ConstantExprs get pushed back into the PHI node's +; incoming block, which might be suboptimal but works in all cases. +; CHECK: define i32* @test_converting_phi(i1 %cmp) +; CHECK: entry: +; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 1 +; CHECK: else: +; CHECK: return: +; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ] + + +@addr1 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %return) +@addr2 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %else) +define i32* @test_converting_phi_with_indirectbr(i8* %addr) { +entry: + indirectbr i8* %addr, [ label %return, label %else ] + +else: + br label %return + +return: + %result = phi i32* [ getelementptr (i32* @tvar, i32 1), %entry ], [ null, %else ] + ret i32* %result +} +; CHECK: define i32* @test_converting_phi_with_indirectbr(i8* %addr) +; CHECK: entry: +; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 1 +; CHECK: return: +; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ] diff --git a/test/Transforms/NaCl/expand-tls-constexpr2.ll b/test/Transforms/NaCl/expand-tls-constexpr2.ll new file mode 100644 index 0000000000..ca7054961b --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-constexpr2.ll @@ -0,0 +1,12 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + +@tvar = thread_local global i32 0 + +define i32 @get_tvar() { + ret i32 ptrtoint (i32* @tvar to i32) +} +; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp() +; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct* +; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 1, i32 0 +; CHECK: %expanded = ptrtoint i32* %field to i32 +; CHECK: ret i32 %expanded diff --git a/test/Transforms/NaCl/expand-tls-phi.ll b/test/Transforms/NaCl/expand-tls-phi.ll new file mode 100644 index 0000000000..0292a1d633 --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-phi.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + + +@tvar = thread_local global i32 123 + +define i32* @get_tvar(i1 %cmp) { +entry: + br i1 %cmp, label %return, label %else + +else: + br label %return + +return: + %result = phi i32* [ @tvar, %entry ], [ null, %else ] + ret i32* %result +} +; The TLS access gets pushed back into the PHI node's incoming block, +; which might be suboptimal but works in all cases. +; CHECK: entry: +; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 0 +; CHECK: else: +; CHECK: return: +; CHECK: %result = phi i32* [ %field, %entry ], [ null, %else ] diff --git a/test/Transforms/NaCl/expand-tls.ll b/test/Transforms/NaCl/expand-tls.ll new file mode 100644 index 0000000000..ec572ffa2c --- /dev/null +++ b/test/Transforms/NaCl/expand-tls.ll @@ -0,0 +1,85 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + +; All thread-local variables should be removed +; RUN: opt < %s -nacl-expand-tls -S | not grep thread_local + + +@tvar1 = thread_local global i64 123 +@tvar2 = thread_local global i32 456 + + +; CHECK: %tls_init_template = type <{ i64, i32 }> +; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }> +; CHECK: %tls_bss_template = type <{ [4 x i8] }> + + +; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i64 123, i32 456 }> + +; CHECK: @__tls_template_alignment = internal constant i32 8 + + +define i64* @get_tvar1() { + ret i64* @tvar1 +} +; CHECK: define i64* @get_tvar1() +; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp() +; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct* +; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 0 +; CHECK: ret i64* %field + + +define i32* @get_tvar2() { + ret i32* @tvar2 +} +; Much the same as for get_tvar1. +; CHECK: define i32* @get_tvar2() +; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 1 + + +; Check that we define global variables for TLS templates + +@__tls_template_start = external global i8 +@__tls_template_tdata_end = external global i8 +@__tls_template_end = external global i8 + +define i8* @get_tls_template_start() { + ret i8* @__tls_template_start +} +; CHECK: define i8* @get_tls_template_start() +; CHECK: ret i8* bitcast (%tls_init_template* @__tls_template_start to i8*) + +define i8* @get_tls_template_tdata_end() { + ret i8* @__tls_template_tdata_end +} +; CHECK: define i8* @get_tls_template_tdata_end() +; CHECK: ret i8* bitcast (%tls_init_template* getelementptr inbounds (%tls_init_template* @__tls_template_start, i32 1) to i8*) + +define i8* @get_tls_template_end() { + ret i8* @__tls_template_end +} +; CHECK: define i8* @get_tls_template_end() +; CHECK: ret i8* bitcast (%tls_struct* getelementptr (%tls_struct* bitcast (%tls_init_template* @__tls_template_start to %tls_struct*), i32 1) to i8*) + + +; Check that we expand out the TLS layout intrinsics + +declare i32 @llvm.nacl.tp.tls.offset(i32) +declare i32 @llvm.nacl.tp.tdb.offset(i32) + +define i32 @test_get_tp_tls_offset(i32 %tls_size) { + %offset = call i32 @llvm.nacl.tp.tls.offset(i32 %tls_size) + ret i32 %offset +} +; Uses of the intrinsic are replaced with uses of a regular function. +; CHECK: define i32 @test_get_tp_tls_offset +; CHECK: call i32 @nacl_tp_tls_offset +; RUN: opt < %s -nacl-expand-tls -S | not grep llvm.nacl.tp.tls.offset + +define i32 @test_get_tp_tdb_offset(i32 %tdb_size) { + %offset = call i32 @llvm.nacl.tp.tdb.offset(i32 %tdb_size) + ret i32 %offset +} +; Uses of the intrinsic are replaced with uses of a regular function. +; CHECK: define i32 @test_get_tp_tdb_offset +; CHECK: call i32 @nacl_tp_tdb_offset +; RUN: opt < %s -nacl-expand-tls -S | not grep llvm.nacl.tp.tdb.offset diff --git a/test/Transforms/NaCl/lit.local.cfg b/test/Transforms/NaCl/lit.local.cfg new file mode 100644 index 0000000000..a43fd3ebdd --- /dev/null +++ b/test/Transforms/NaCl/lit.local.cfg @@ -0,0 +1,3 @@ +# -*- Python -*- + +config.suffixes = ['.ll'] diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 2f91207bca..6d9787513a 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -581,6 +581,8 @@ int main(int argc, char **argv) { initializeInstrumentation(Registry); initializeTarget(Registry); initializeExpandCtorsPass(Registry); + initializeExpandTlsPass(Registry); + initializeExpandTlsConstantExprPass(Registry); cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .bc modular optimizer and analysis printer\n"); |