aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Seaborn <mseaborn@chromium.org>2012-11-29 18:42:19 -0800
committerMark Seaborn <mseaborn@chromium.org>2012-11-29 18:42:19 -0800
commitffb0eedef4f034996ce59aac3176482617a8044c (patch)
tree03195b67f4e4770e961a4487fcdc5ac9e52bbb17
parent7a51b9c26dfc7fb9df8047c553227b47d64ba5c6 (diff)
PNaCl: Add ExpandTls pass for expanding out static TLS variables
This replaces each reference to a TLS variable "foo" with the LLVM IR equivalent of the expression: ((struct tls_template *) __nacl_read_tp())->foo This pass fills out the global variables __tls_template_start, __tls_template_end etc. which are used by src/untrusted/nacl/tls.c. These are the symbols that are otherwise defined by a binutils linker script. In order to handle the case of TLS variables that occur inside ConstantExprs, we have a helper pass, ExpandTlsConstantExpr. BUG=http://code.google.com/p/nativeclient/issues/detail?id=2837 TEST=test/Transforms/NaCl/expand-tls*.ll Review URL: https://chromiumcodereview.appspot.com/10896042
-rw-r--r--include/llvm/InitializePasses.h2
-rw-r--r--include/llvm/Transforms/NaCl.h2
-rw-r--r--lib/Transforms/NaCl/CMakeLists.txt2
-rw-r--r--lib/Transforms/NaCl/ExpandTls.cpp351
-rw-r--r--lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp110
-rw-r--r--test/Transforms/NaCl/expand-tls-aligned.ll42
-rw-r--r--test/Transforms/NaCl/expand-tls-bss.ll17
-rw-r--r--test/Transforms/NaCl/expand-tls-constexpr.ll116
-rw-r--r--test/Transforms/NaCl/expand-tls-constexpr2.ll12
-rw-r--r--test/Transforms/NaCl/expand-tls-phi.ll23
-rw-r--r--test/Transforms/NaCl/expand-tls.ll85
-rw-r--r--test/Transforms/NaCl/lit.local.cfg3
-rw-r--r--tools/opt/opt.cpp2
13 files changed, 767 insertions, 0 deletions
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index a6b7d31817..c794308f73 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -266,6 +266,8 @@ void initializeLoopVectorizePass(PassRegistry&);
void initializeBBVectorizePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
void initializeExpandCtorsPass(PassRegistry&); // @LOCALMOD
+void initializeExpandTlsPass(PassRegistry&); // @LOCALMOD
+void initializeExpandTlsConstantExprPass(PassRegistry&); // @LOCALMOD
void initializeNaClCcRewritePass(PassRegistry&); // @LOCALMOD
}
diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h
index fe29463a8b..79c9b9fe79 100644
--- a/include/llvm/Transforms/NaCl.h
+++ b/include/llvm/Transforms/NaCl.h
@@ -15,6 +15,8 @@ namespace llvm {
class ModulePass;
ModulePass *createExpandCtorsPass();
+ModulePass *createExpandTlsPass();
+ModulePass *createExpandTlsConstantExprPass();
}
diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt
index d634ad9655..5e24cc7e28 100644
--- a/lib/Transforms/NaCl/CMakeLists.txt
+++ b/lib/Transforms/NaCl/CMakeLists.txt
@@ -1,5 +1,7 @@
add_llvm_library(LLVMTransformsNaCl
ExpandCtors.cpp
+ ExpandTls.cpp
+ ExpandTlsConstantExpr.cpp
)
add_dependencies(LLVMTransformsNaCl intrinsics_gen)
diff --git a/lib/Transforms/NaCl/ExpandTls.cpp b/lib/Transforms/NaCl/ExpandTls.cpp
new file mode 100644
index 0000000000..8ce439c018
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandTls.cpp
@@ -0,0 +1,351 @@
+//===- ExpandTls.cpp - Convert TLS variables to a concrete layout----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out uses of thread-local (TLS) variables into
+// more primitive operations.
+//
+// A reference to the address of a TLS variable is expanded into code
+// which gets the current thread's thread pointer using
+// @llvm.nacl.read.tp() and adds a fixed offset.
+//
+// This pass allocates the offsets (relative to the thread pointer)
+// that will be used for TLS variables. It sets up the global
+// variables __tls_template_start, __tls_template_end etc. to contain
+// a template for initializing TLS variables' values for each thread.
+// This is a task normally performed by the linker in ELF systems.
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+ struct VarInfo {
+ GlobalVariable *TlsVar;
+ bool IsBss; // Whether variable is in zero-intialized part of template
+ int TemplateIndex;
+ };
+
+ class PassState {
+ public:
+ PassState(Module *M): M(M), DL(M), Offset(0), Alignment(1) {}
+
+ Module *M;
+ DataLayout DL;
+ uint64_t Offset;
+ // 'Alignment' is the maximum variable alignment seen so far, in
+ // bytes. After visiting all TLS variables, this is the overall
+ // alignment required for the TLS template.
+ uint32_t Alignment;
+ };
+
+ class ExpandTls : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandTls() : ModulePass(ID) {
+ initializeExpandTlsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+ };
+}
+
+char ExpandTls::ID = 0;
+INITIALIZE_PASS(ExpandTls, "nacl-expand-tls",
+ "Expand out TLS variables and fix TLS variable layout",
+ false, false)
+
+static void setGlobalVariableValue(Module &M, const char *Name,
+ Constant *Value) {
+ GlobalVariable *Var = M.getNamedGlobal(Name);
+ if (!Var) {
+ // This warning can happen in a program that does not use a libc
+ // and does not initialize TLS variables. Such a program might be
+ // linked with "-nostdlib".
+ errs() << "Warning: Variable " << Name << " not referenced\n";
+ } else {
+ if (Var->hasInitializer()) {
+ report_fatal_error(std::string("Variable ") + Name +
+ " already has an initializer");
+ }
+ Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType()));
+ Var->eraseFromParent();
+ }
+}
+
+// Insert alignment padding into the TLS template.
+static void padToAlignment(PassState *State,
+ std::vector<Type*> *FieldTypes,
+ std::vector<Constant*> *FieldValues,
+ unsigned Alignment) {
+ if ((State->Offset & (Alignment - 1)) != 0) {
+ unsigned PadSize = Alignment - (State->Offset & (Alignment - 1));
+ Type *i8 = Type::getInt8Ty(State->M->getContext());
+ Type *PadType = ArrayType::get(i8, PadSize);
+ FieldTypes->push_back(PadType);
+ if (FieldValues)
+ FieldValues->push_back(Constant::getNullValue(PadType));
+ State->Offset += PadSize;
+ }
+ if (State->Alignment < Alignment) {
+ State->Alignment = Alignment;
+ }
+}
+
+static void addVarToTlsTemplate(PassState *State,
+ std::vector<Type*> *FieldTypes,
+ std::vector<Constant*> *FieldValues,
+ GlobalVariable *TlsVar) {
+ unsigned Alignment = State->DL.getPreferredAlignment(TlsVar);
+ padToAlignment(State, FieldTypes, FieldValues, Alignment);
+
+ FieldTypes->push_back(TlsVar->getType()->getElementType());
+ if (FieldValues)
+ FieldValues->push_back(TlsVar->getInitializer());
+ State->Offset +=
+ State->DL.getTypeAllocSize(TlsVar->getType()->getElementType());
+}
+
+static PointerType *buildTlsTemplate(Module &M, std::vector<VarInfo> *TlsVars) {
+ std::vector<Type*> FieldBssTypes;
+ std::vector<Type*> FieldInitTypes;
+ std::vector<Constant*> FieldInitValues;
+ PassState State(&M);
+
+ for (Module::global_iterator GV = M.global_begin();
+ GV != M.global_end();
+ ++GV) {
+ if (GV->isThreadLocal()) {
+ if (!GV->hasInitializer()) {
+ // Since this is a whole-program transformation, "extern" TLS
+ // variables are not allowed at this point.
+ report_fatal_error(std::string("TLS variable without an initializer: ")
+ + GV->getName());
+ }
+ if (!GV->getInitializer()->isNullValue()) {
+ addVarToTlsTemplate(&State, &FieldInitTypes,
+ &FieldInitValues, GV);
+ VarInfo Info;
+ Info.TlsVar = GV;
+ Info.IsBss = false;
+ Info.TemplateIndex = FieldInitTypes.size() - 1;
+ TlsVars->push_back(Info);
+ }
+ }
+ }
+ // Handle zero-initialized TLS variables in a second pass, because
+ // these should follow non-zero-initialized TLS variables.
+ for (Module::global_iterator GV = M.global_begin();
+ GV != M.global_end();
+ ++GV) {
+ if (GV->isThreadLocal() && GV->getInitializer()->isNullValue()) {
+ addVarToTlsTemplate(&State, &FieldBssTypes, NULL, GV);
+ VarInfo Info;
+ Info.TlsVar = GV;
+ Info.IsBss = true;
+ Info.TemplateIndex = FieldBssTypes.size() - 1;
+ TlsVars->push_back(Info);
+ }
+ }
+ // Add final alignment padding so that
+ // (struct tls_struct *) __nacl_read_tp() - 1
+ // gives the correct, aligned start of the TLS variables given the
+ // x86-style layout we are using. This requires some more bytes to
+ // be memset() to zero at runtime. This wastage doesn't seem
+ // important gives that we're not trying to optimize packing by
+ // reordering to put similarly-aligned variables together.
+ padToAlignment(&State, &FieldBssTypes, NULL, State.Alignment);
+
+ // We create the TLS template structs as "packed" because we insert
+ // alignment padding ourselves, and LLVM's implicit insertion of
+ // padding would interfere with ours. tls_bss_template can start at
+ // a non-aligned address immediately following the last field in
+ // tls_init_template.
+ StructType *InitTemplateType =
+ StructType::create(M.getContext(), "tls_init_template");
+ InitTemplateType->setBody(FieldInitTypes, /*isPacked=*/true);
+ StructType *BssTemplateType =
+ StructType::create(M.getContext(), "tls_bss_template");
+ BssTemplateType->setBody(FieldBssTypes, /*isPacked=*/true);
+
+ StructType *TemplateType = StructType::create(M.getContext(), "tls_struct");
+ SmallVector<Type*, 2> TemplateTopFields;
+ TemplateTopFields.push_back(InitTemplateType);
+ TemplateTopFields.push_back(BssTemplateType);
+ TemplateType->setBody(TemplateTopFields, /*isPacked=*/true);
+ PointerType *TemplatePtrType = PointerType::get(TemplateType, 0);
+
+ // We define the following symbols, which are the same as those
+ // defined by NaCl's original customized binutils linker scripts:
+ // __tls_template_start
+ // __tls_template_tdata_end
+ // __tls_template_end
+ // We also define __tls_template_alignment, which was not defined by
+ // the original linker scripts.
+
+ const char *StartSymbol = "__tls_template_start";
+ Constant *TemplateData = ConstantStruct::get(InitTemplateType,
+ FieldInitValues);
+ GlobalVariable *TemplateDataVar =
+ new GlobalVariable(M, InitTemplateType, /*isConstant=*/true,
+ GlobalValue::InternalLinkage, TemplateData);
+ setGlobalVariableValue(M, StartSymbol, TemplateDataVar);
+ TemplateDataVar->setName(StartSymbol);
+
+ Constant *TdataEnd = ConstantExpr::getGetElementPtr(
+ TemplateDataVar,
+ ConstantInt::get(M.getContext(), APInt(32, 1)));
+ setGlobalVariableValue(M, "__tls_template_tdata_end", TdataEnd);
+
+ Constant *TotalEnd = ConstantExpr::getGetElementPtr(
+ ConstantExpr::getBitCast(TemplateDataVar, TemplatePtrType),
+ ConstantInt::get(M.getContext(), APInt(32, 1)));
+ setGlobalVariableValue(M, "__tls_template_end", TotalEnd);
+
+ const char *AlignmentSymbol = "__tls_template_alignment";
+ Type *i32 = Type::getInt32Ty(M.getContext());
+ GlobalVariable *AlignmentVar = new GlobalVariable(
+ M, i32, /*isConstant=*/true,
+ GlobalValue::InternalLinkage,
+ ConstantInt::get(M.getContext(), APInt(32, State.Alignment)));
+ setGlobalVariableValue(M, AlignmentSymbol, AlignmentVar);
+ AlignmentVar->setName(AlignmentSymbol);
+
+ return TemplatePtrType;
+}
+
+static void rewriteTlsVars(Module &M, std::vector<VarInfo> *TlsVars,
+ PointerType *TemplatePtrType) {
+ // Set up the intrinsic that reads the thread pointer.
+ Type *i8 = Type::getInt8Ty(M.getContext());
+ FunctionType *ReadTpType = FunctionType::get(PointerType::get(i8, 0),
+ /*isVarArg=*/false);
+ AttrBuilder B;
+ B.addAttribute(Attributes::ReadOnly);
+ B.addAttribute(Attributes::NoUnwind);
+ AttrListPtr ReadTpAttrs = AttrListPtr().addAttr(
+ M.getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(M.getContext(), B));
+ Constant *ReadTpFunc = M.getOrInsertTargetIntrinsic("llvm.nacl.read.tp",
+ ReadTpType,
+ ReadTpAttrs);
+
+ for (std::vector<VarInfo>::iterator VarInfo = TlsVars->begin();
+ VarInfo != TlsVars->end();
+ ++VarInfo) {
+ GlobalVariable *Var = VarInfo->TlsVar;
+ while (!Var->use_empty()) {
+ Instruction *U = cast<Instruction>(*Var->use_begin());
+ Instruction *InsertPt = U;
+ if (PHINode *PN = dyn_cast<PHINode>(InsertPt)) {
+ // We cannot insert instructions before a PHI node, so insert
+ // before the incoming block's terminator. Note that if the
+ // terminator is conditional, this could be suboptimal,
+ // because we might be calling ReadTpFunc unnecessarily.
+ InsertPt = PN->getIncomingBlock(Var->use_begin())->getTerminator();
+ }
+ Value *RawThreadPtr = CallInst::Create(ReadTpFunc, "tls_raw", InsertPt);
+ Value *TypedThreadPtr = new BitCastInst(RawThreadPtr, TemplatePtrType,
+ "tls_struct", InsertPt);
+ SmallVector<Value*, 3> Indexes;
+ // We use -1 because we use the x86-style TLS layout in which
+ // the TLS data is stored at addresses below the thread pointer.
+ // This is largely because a check in nacl_irt_thread_create()
+ // in irt/irt_thread.c requires the thread pointer to be a
+ // self-pointer on x86-32.
+ // TODO(mseaborn): I intend to remove that check because it is
+ // non-portable. In the mean time, we want PNaCl pexes to work
+ // in older Chromium releases when translated to nexes.
+ Indexes.push_back(ConstantInt::get(
+ M.getContext(), APInt(32, -1)));
+ Indexes.push_back(ConstantInt::get(
+ M.getContext(), APInt(32, VarInfo->IsBss ? 1 : 0)));
+ Indexes.push_back(ConstantInt::get(
+ M.getContext(), APInt(32, VarInfo->TemplateIndex)));
+ Value *TlsField = GetElementPtrInst::Create(TypedThreadPtr, Indexes,
+ "field", InsertPt);
+ U->replaceUsesOfWith(Var, TlsField);
+ }
+ VarInfo->TlsVar->eraseFromParent();
+ }
+}
+
+// Provide fixed definitions for PNaCl's TLS layout intrinsics. We
+// adopt the x86-style layout: ExpandTls will output a program that
+// uses the x86-style layout wherever it runs. This overrides any
+// architecture-specific definitions of the intrinsics that the LLVM
+// backend might provide.
+static void defineTlsLayoutIntrinsics(Module &M) {
+ Type *i32 = Type::getInt32Ty(M.getContext());
+ SmallVector<Type*, 1> ArgTypes;
+ ArgTypes.push_back(i32);
+ FunctionType *FuncType = FunctionType::get(i32, ArgTypes, /*isVarArg=*/false);
+ Function *NewFunc;
+ BasicBlock *BB;
+
+ // Define the intrinsic as follows:
+ // uint32_t __nacl_tp_tdb_offset(uint32_t tdb_size) {
+ // return 0;
+ // }
+ // This means the thread pointer points to the TDB.
+ NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage,
+ "nacl_tp_tdb_offset", &M);
+ BB = BasicBlock::Create(M.getContext(), "entry", NewFunc);
+ ReturnInst::Create(M.getContext(),
+ ConstantInt::get(M.getContext(), APInt(32, 0)), BB);
+ if (Function *Intrinsic = M.getFunction("llvm.nacl.tp.tdb.offset")) {
+ Intrinsic->replaceAllUsesWith(NewFunc);
+ Intrinsic->eraseFromParent();
+ }
+
+ // Define the intrinsic as follows:
+ // uint32_t __nacl_tp_tls_offset(uint32_t tls_size) {
+ // return -tls_size;
+ // }
+ // This means the TLS variables are stored below the thread pointer.
+ NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage,
+ "nacl_tp_tls_offset", &M);
+ BB = BasicBlock::Create(M.getContext(), "entry", NewFunc);
+ Value *Arg = NewFunc->arg_begin();
+ Arg->setName("size");
+ Value *Result = BinaryOperator::CreateNeg(Arg, "result", BB);
+ ReturnInst::Create(M.getContext(), Result, BB);
+ if (Function *Intrinsic = M.getFunction("llvm.nacl.tp.tls.offset")) {
+ Intrinsic->replaceAllUsesWith(NewFunc);
+ Intrinsic->eraseFromParent();
+ }
+}
+
+bool ExpandTls::runOnModule(Module &M) {
+ ModulePass *Pass = createExpandTlsConstantExprPass();
+ Pass->runOnModule(M);
+ delete Pass;
+
+ std::vector<VarInfo> TlsVars;
+ PointerType *TemplatePtrType = buildTlsTemplate(M, &TlsVars);
+ rewriteTlsVars(M, &TlsVars, TemplatePtrType);
+
+ defineTlsLayoutIntrinsics(M);
+
+ return true;
+}
+
+ModulePass *llvm::createExpandTlsPass() {
+ return new ExpandTls();
+}
diff --git a/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp b/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp
new file mode 100644
index 0000000000..90e007604f
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp
@@ -0,0 +1,110 @@
+//===- ExpandTlsConstantExpr.cpp - Convert ConstantExprs to Instructions---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a helper used by the ExpandTls pass.
+//
+// LLVM treats the address of a TLS variable as a ConstantExpr. This
+// is arguably a bug because the address of a TLS variable is *not* a
+// constant: it varies between threads.
+//
+// See http://llvm.org/bugs/show_bug.cgi?id=14353
+//
+// This is also a problem for the ExpandTls pass, which wants to use
+// replaceUsesOfWith() to replace each TLS variable with an
+// Instruction sequence that calls @llvm.nacl.read.tp(). This doesn't
+// work if the TLS variable is used inside other ConstantExprs,
+// because ConstantExprs are interned and are not associated with any
+// function, whereas each Instruction must be part of a function.
+//
+// To fix that problem, this pass converts ConstantExprs that
+// reference TLS variables into Instructions.
+//
+// For example, this use of a 'ptrtoint' ConstantExpr:
+//
+// ret i32 ptrtoint (i32* @tls_var to i32)
+//
+// is converted into this 'ptrtoint' Instruction:
+//
+// %expanded = ptrtoint i32* @tls_var to i32
+// ret i32 %expanded
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+ class ExpandTlsConstantExpr : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandTlsConstantExpr() : ModulePass(ID) {
+ initializeExpandTlsConstantExprPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnModule(Module &M);
+ };
+}
+
+char ExpandTlsConstantExpr::ID = 0;
+INITIALIZE_PASS(ExpandTlsConstantExpr, "nacl-expand-tls-constant-expr",
+ "Eliminate ConstantExpr references to TLS variables",
+ false, false)
+
+// This removes ConstantExpr references to the given Constant.
+static void expandConstExpr(Constant *Expr) {
+ // First, ensure that ConstantExpr references to Expr are converted
+ // to Instructions so that we can modify them.
+ for (Value::use_iterator UI = Expr->use_begin();
+ UI != Expr->use_end();
+ ++UI) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+ expandConstExpr(CE);
+ }
+ }
+ Expr->removeDeadConstantUsers();
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Expr)) {
+ while (!Expr->use_empty()) {
+ Instruction *U = cast<Instruction>(*Expr->use_begin());
+ Instruction *InsertPt = U;
+ if (PHINode *PN = dyn_cast<PHINode>(InsertPt)) {
+ // We cannot insert instructions before a PHI node, so insert
+ // before the incoming block's terminator. This could be
+ // suboptimal if the terminator is a conditional.
+ InsertPt = PN->getIncomingBlock(Expr->use_begin())->getTerminator();
+ }
+ Instruction *NewInst = CE->getAsInstruction();
+ NewInst->insertBefore(InsertPt);
+ NewInst->setName("expanded");
+ U->replaceUsesOfWith(CE, NewInst);
+ }
+ }
+}
+
+bool ExpandTlsConstantExpr::runOnModule(Module &M) {
+ for (Module::global_iterator Global = M.global_begin();
+ Global != M.global_end();
+ ++Global) {
+ if (Global->isThreadLocal()) {
+ expandConstExpr(Global);
+ }
+ }
+ return true;
+}
+
+ModulePass *llvm::createExpandTlsConstantExprPass() {
+ return new ExpandTlsConstantExpr();
+}
diff --git a/test/Transforms/NaCl/expand-tls-aligned.ll b/test/Transforms/NaCl/expand-tls-aligned.ll
new file mode 100644
index 0000000000..75f03ba306
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-aligned.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+
+@var = global i32 123
+
+; Put this first to check that the pass handles BSS variables last.
+@bss_tvar_aligned = thread_local global i32 0, align 64
+
+@tvar1 = thread_local global i16 234
+; Test a pointer to check we are getting the right pointer size.
+@tvar2 = thread_local global i32* @var
+@tvar_aligned = thread_local global i8 99, align 32
+
+
+; CHECK: %tls_init_template = type <{ i16, [2 x i8], i32*, [24 x i8], i8 }>
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+
+; This struct type must be "packed" because the 31 byte padding here
+; is followed by an i32.
+; CHECK: %tls_bss_template = type <{ [31 x i8], i32, [60 x i8] }>
+
+; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i16 234, [2 x i8] zeroinitializer, i32* @var, [24 x i8] zeroinitializer, i8 99 }>
+
+; CHECK: @__tls_template_alignment = internal constant i32 64
+
+
+; Create references to __tls_template_* to keep these live, otherwise
+; the definition of %tls_struct (which we check for above) is removed
+; from the output.
+
+@__tls_template_tdata_end = external global i8
+@__tls_template_end = external global i8
+
+define i8* @get_tls_template_tdata_end() {
+ ret i8* @__tls_template_tdata_end
+}
+
+define i8* @get_tls_template_end() {
+ ret i8* @__tls_template_end
+}
diff --git a/test/Transforms/NaCl/expand-tls-bss.ll b/test/Transforms/NaCl/expand-tls-bss.ll
new file mode 100644
index 0000000000..02504611f0
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-bss.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+
+@tvar_bss1 = thread_local global i64 0
+@tvar_bss2 = thread_local global i32 0
+
+
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+; CHECK: %tls_bss_template = type <{ i64, i32, [4 x i8] }>
+
+
+define i64* @get_tvar_bss1() {
+ ret i64* @tvar_bss1
+}
+; CHECK: define i64* @get_tvar_bss1()
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 1, i32 0
+; CHECK: ret i64* %field
diff --git a/test/Transforms/NaCl/expand-tls-constexpr.ll b/test/Transforms/NaCl/expand-tls-constexpr.ll
new file mode 100644
index 0000000000..06bb8ed830
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-constexpr.ll
@@ -0,0 +1,116 @@
+; RUN: opt < %s -nacl-expand-tls-constant-expr -S | FileCheck %s
+
+@tvar = thread_local global i32 0
+
+
+define i32 @test_converting_ptrtoint() {
+ ret i32 ptrtoint (i32* @tvar to i32)
+}
+; CHECK: define i32 @test_converting_ptrtoint()
+; CHECK: %expanded = ptrtoint i32* @tvar to i32
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_converting_add() {
+ ret i32 add (i32 ptrtoint (i32* @tvar to i32), i32 4)
+}
+; CHECK: define i32 @test_converting_add()
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %expanded = add i32 %expanded1, 4
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_converting_multiple_operands() {
+ ret i32 add (i32 ptrtoint (i32* @tvar to i32),
+ i32 ptrtoint (i32* @tvar to i32))
+}
+; CHECK: define i32 @test_converting_multiple_operands()
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %expanded = add i32 %expanded1, %expanded1
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_allocating_new_var_name(i32 %expanded) {
+ %result = add i32 %expanded, ptrtoint (i32* @tvar to i32)
+ ret i32 %result
+}
+; CHECK: define i32 @test_allocating_new_var_name(i32 %expanded)
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %result = add i32 %expanded, %expanded1
+; CHECK: ret i32 %result
+
+
+define i8* @test_converting_bitcast() {
+ ret i8* bitcast (i32* @tvar to i8*)
+}
+; CHECK: define i8* @test_converting_bitcast()
+; CHECK: %expanded = bitcast i32* @tvar to i8*
+; CHECK: ret i8* %expanded
+
+
+define i32* @test_converting_getelementptr() {
+ ; Use an index >1 to ensure that "inbounds" is not added automatically.
+ ret i32* getelementptr (i32* @tvar, i32 2)
+}
+; CHECK: define i32* @test_converting_getelementptr()
+; CHECK: %expanded = getelementptr i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+; This is identical to @test_converting_getelementptr().
+; We need to check that both copies of getelementptr are fixed.
+define i32* @test_converting_getelementptr_copy() {
+ ret i32* getelementptr (i32* @tvar, i32 2)
+}
+; CHECK: define i32* @test_converting_getelementptr_copy()
+; CHECK: %expanded = getelementptr i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+define i32* @test_converting_getelementptr_inbounds() {
+ ret i32* getelementptr inbounds (i32* @tvar, i32 2)
+}
+; CHECK: define i32* @test_converting_getelementptr_inbounds()
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+define i32* @test_converting_phi(i1 %cmp) {
+entry:
+ br i1 %cmp, label %return, label %else
+
+else:
+ br label %return
+
+return:
+ %result = phi i32* [ getelementptr (i32* @tvar, i32 1), %entry ], [ null, %else ]
+ ret i32* %result
+}
+; The converted ConstantExprs get pushed back into the PHI node's
+; incoming block, which might be suboptimal but works in all cases.
+; CHECK: define i32* @test_converting_phi(i1 %cmp)
+; CHECK: entry:
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 1
+; CHECK: else:
+; CHECK: return:
+; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ]
+
+
+@addr1 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %return)
+@addr2 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %else)
+define i32* @test_converting_phi_with_indirectbr(i8* %addr) {
+entry:
+ indirectbr i8* %addr, [ label %return, label %else ]
+
+else:
+ br label %return
+
+return:
+ %result = phi i32* [ getelementptr (i32* @tvar, i32 1), %entry ], [ null, %else ]
+ ret i32* %result
+}
+; CHECK: define i32* @test_converting_phi_with_indirectbr(i8* %addr)
+; CHECK: entry:
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 1
+; CHECK: return:
+; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ]
diff --git a/test/Transforms/NaCl/expand-tls-constexpr2.ll b/test/Transforms/NaCl/expand-tls-constexpr2.ll
new file mode 100644
index 0000000000..ca7054961b
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-constexpr2.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+@tvar = thread_local global i32 0
+
+define i32 @get_tvar() {
+ ret i32 ptrtoint (i32* @tvar to i32)
+}
+; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp()
+; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct*
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 1, i32 0
+; CHECK: %expanded = ptrtoint i32* %field to i32
+; CHECK: ret i32 %expanded
diff --git a/test/Transforms/NaCl/expand-tls-phi.ll b/test/Transforms/NaCl/expand-tls-phi.ll
new file mode 100644
index 0000000000..0292a1d633
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-phi.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+
+@tvar = thread_local global i32 123
+
+define i32* @get_tvar(i1 %cmp) {
+entry:
+ br i1 %cmp, label %return, label %else
+
+else:
+ br label %return
+
+return:
+ %result = phi i32* [ @tvar, %entry ], [ null, %else ]
+ ret i32* %result
+}
+; The TLS access gets pushed back into the PHI node's incoming block,
+; which might be suboptimal but works in all cases.
+; CHECK: entry:
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 0
+; CHECK: else:
+; CHECK: return:
+; CHECK: %result = phi i32* [ %field, %entry ], [ null, %else ]
diff --git a/test/Transforms/NaCl/expand-tls.ll b/test/Transforms/NaCl/expand-tls.ll
new file mode 100644
index 0000000000..ec572ffa2c
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+; All thread-local variables should be removed
+; RUN: opt < %s -nacl-expand-tls -S | not grep thread_local
+
+
+@tvar1 = thread_local global i64 123
+@tvar2 = thread_local global i32 456
+
+
+; CHECK: %tls_init_template = type <{ i64, i32 }>
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+; CHECK: %tls_bss_template = type <{ [4 x i8] }>
+
+
+; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i64 123, i32 456 }>
+
+; CHECK: @__tls_template_alignment = internal constant i32 8
+
+
+define i64* @get_tvar1() {
+ ret i64* @tvar1
+}
+; CHECK: define i64* @get_tvar1()
+; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp()
+; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct*
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 0
+; CHECK: ret i64* %field
+
+
+define i32* @get_tvar2() {
+ ret i32* @tvar2
+}
+; Much the same as for get_tvar1.
+; CHECK: define i32* @get_tvar2()
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 1
+
+
+; Check that we define global variables for TLS templates
+
+@__tls_template_start = external global i8
+@__tls_template_tdata_end = external global i8
+@__tls_template_end = external global i8
+
+define i8* @get_tls_template_start() {
+ ret i8* @__tls_template_start
+}
+; CHECK: define i8* @get_tls_template_start()
+; CHECK: ret i8* bitcast (%tls_init_template* @__tls_template_start to i8*)
+
+define i8* @get_tls_template_tdata_end() {
+ ret i8* @__tls_template_tdata_end
+}
+; CHECK: define i8* @get_tls_template_tdata_end()
+; CHECK: ret i8* bitcast (%tls_init_template* getelementptr inbounds (%tls_init_template* @__tls_template_start, i32 1) to i8*)
+
+define i8* @get_tls_template_end() {
+ ret i8* @__tls_template_end
+}
+; CHECK: define i8* @get_tls_template_end()
+; CHECK: ret i8* bitcast (%tls_struct* getelementptr (%tls_struct* bitcast (%tls_init_template* @__tls_template_start to %tls_struct*), i32 1) to i8*)
+
+
+; Check that we expand out the TLS layout intrinsics
+
+declare i32 @llvm.nacl.tp.tls.offset(i32)
+declare i32 @llvm.nacl.tp.tdb.offset(i32)
+
+define i32 @test_get_tp_tls_offset(i32 %tls_size) {
+ %offset = call i32 @llvm.nacl.tp.tls.offset(i32 %tls_size)
+ ret i32 %offset
+}
+; Uses of the intrinsic are replaced with uses of a regular function.
+; CHECK: define i32 @test_get_tp_tls_offset
+; CHECK: call i32 @nacl_tp_tls_offset
+; RUN: opt < %s -nacl-expand-tls -S | not grep llvm.nacl.tp.tls.offset
+
+define i32 @test_get_tp_tdb_offset(i32 %tdb_size) {
+ %offset = call i32 @llvm.nacl.tp.tdb.offset(i32 %tdb_size)
+ ret i32 %offset
+}
+; Uses of the intrinsic are replaced with uses of a regular function.
+; CHECK: define i32 @test_get_tp_tdb_offset
+; CHECK: call i32 @nacl_tp_tdb_offset
+; RUN: opt < %s -nacl-expand-tls -S | not grep llvm.nacl.tp.tdb.offset
diff --git a/test/Transforms/NaCl/lit.local.cfg b/test/Transforms/NaCl/lit.local.cfg
new file mode 100644
index 0000000000..a43fd3ebdd
--- /dev/null
+++ b/test/Transforms/NaCl/lit.local.cfg
@@ -0,0 +1,3 @@
+# -*- Python -*-
+
+config.suffixes = ['.ll']
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 2f91207bca..6d9787513a 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -581,6 +581,8 @@ int main(int argc, char **argv) {
initializeInstrumentation(Registry);
initializeTarget(Registry);
initializeExpandCtorsPass(Registry);
+ initializeExpandTlsPass(Registry);
+ initializeExpandTlsConstantExprPass(Registry);
cl::ParseCommandLineOptions(argc, argv,
"llvm .bc -> .bc modular optimizer and analysis printer\n");