diff options
author | Mark Seaborn <mseaborn@chromium.org> | 2012-11-08 07:18:03 -0800 |
---|---|---|
committer | Mark Seaborn <mseaborn@chromium.org> | 2012-11-08 07:18:03 -0800 |
commit | ab80f55fb9bc042c115479c3b5debd86d3b72a6f (patch) | |
tree | 639d06f367080c0a049a12d292e04f78aff523ff | |
parent | 96b7ae0415ed0b161b66e57bb6092192ed330ec7 (diff) |
Add @nacl.read.tp() intrinsic, a fast version of NaCl's tls_get() IRT interface
This is in preparation for adding an LLVM pass that will expand out
TLS (thread_local) variable accesses into calls to nacl.read.tp.
On ARM, there is already an arm.thread.pointer intrinsic. We reuse
the code for that.
On x86, we have to add an implementation. The added code is based on
x86's LowerToTLSExecModel() for the %gs:0 case, and on NaCl-MIPS'
LowerGlobalTLSAddress() for the __nacl_read_tp() case. (In contrast,
X86NaClRewritePass.cpp inserts a __nacl_read_tp() call at the lower MI
level; we don't use that approach here.)
We convert LowerINTRINSIC_WO_CHAIN() into a method in order to access
the Subtarget member. This is consistent with other x86 Lower methods
and with the ARM version.
BUG=https://code.google.com/p/nativeclient/issues/detail?id=2837
TEST="llvm-lit test/NaCl"
Review URL: https://codereview.chromium.org/11383002
-rw-r--r-- | include/llvm/CodeGen/ISDOpcodes.h | 3 | ||||
-rw-r--r-- | include/llvm/Intrinsics.td | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 34 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 1 | ||||
-rw-r--r-- | test/NaCl/ARM/nacl-read-tp-intrinsic.ll | 21 | ||||
-rw-r--r-- | test/NaCl/X86/lit.local.cfg | 6 | ||||
-rw-r--r-- | test/NaCl/X86/nacl-read-tp-intrinsic.ll | 36 |
8 files changed, 105 insertions, 1 deletions
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 95aafb324d..47170e4e58 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -643,6 +643,9 @@ namespace ISD { // @LOCALMOD-BEGIN // NACL_* - Native Client instrinsics. + // NACL_READ_TP is a fast built-in version of NaCl's tls_get() IRT + // interface. + NACL_READ_TP, // These correspond to functions in: // native_client/src/untrusted/nacl/tls_params.h NACL_TP_TLS_OFFSET, diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 68d7d4cdf4..42b9da6914 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -467,6 +467,10 @@ def int_nacl_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_ptr_ty]>, def int_nacl_longjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>, GCCBuiltin<"__builtin_nacl_longjmp">; +// Fast built-in version of NaCl's tls_get() IRT interface. +def int_nacl_read_tp : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, + GCCBuiltin<"__builtin_nacl_read_tp">; + // The following intrinsics provide target-specific implementations of // the interface in native_client/src/untrusted/nacl/tls_params.h. // The intrinsic names are basically the functions there without the diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6cf8473f9d..6ab48f6244 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2587,6 +2587,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::nacl_read_tp: // @LOCALMOD case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b0fb2f9f68..972913ea1f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10129,11 +10129,43 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } -static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + + // @LOCALMOD-BEGIN + case Intrinsic::nacl_read_tp: { + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + if (Subtarget->is64Bit() || llvm::TLSUseCall) { + // Call __nacl_read_tp() to get the thread pointer. + unsigned PtrSize = PtrVT.getSizeInBits(); + IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); + SDValue ReadTpFunction = DAG.getExternalSymbol("__nacl_read_tp", PtrVT); + ArgListTy Args; + TargetLowering::CallLoweringInfo CLI( + DAG.getEntryNode(), PtrTy, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*doesNotRet=*/false, + /*isReturnValueUsed=*/true, + ReadTpFunction, Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + return CallResult.first; + } else { + // Get %gs:0, which contains the thread pointer on x86-32. + unsigned GSAddrSpace = 256; + Value *Ptr = Constant::getNullValue( + Type::getInt8PtrTy(*DAG.getContext(), GSAddrSpace)); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getIntPtrConstant(0), + MachinePointerInfo(Ptr), + false, false, false, 0); + } + } + // @LOCALMOD-END + // Comparison intrinsics. case Intrinsic::x86_sse_comieq_ss: case Intrinsic::x86_sse_comilt_ss: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9c4fc95b56..39d24c35c7 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -830,6 +830,7 @@ namespace llvm { SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/NaCl/ARM/nacl-read-tp-intrinsic.ll b/test/NaCl/ARM/nacl-read-tp-intrinsic.ll new file mode 100644 index 0000000000..3ad5181149 --- /dev/null +++ b/test/NaCl/ARM/nacl-read-tp-intrinsic.ll @@ -0,0 +1,21 @@ + +; RUN: llc -mtriple=armv7-unknown-nacl -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -r -triple armv7 - \ +; RUN: | FileCheck -check-prefix=ARM %s + +; RUN: llc -mtriple=armv7-unknown-nacl -sfi-store -filetype=obj -mtls-use-call %s -o - \ +; RUN: | llvm-objdump -disassemble -r -triple armv7 - \ +; RUN: | FileCheck -check-prefix=ARM_IRT %s + + +declare i8* @llvm.nacl.read.tp() + +define i8* @get_thread_pointer() { + %tp = call i8* @llvm.nacl.read.tp() + ret i8* %tp +} + +; ARM: ldr r0, [r9] + +; ARM_IRT: bl # +; ARM_IRT-NEXT: __aeabi_read_tp diff --git a/test/NaCl/X86/lit.local.cfg b/test/NaCl/X86/lit.local.cfg new file mode 100644 index 0000000000..56bf008595 --- /dev/null +++ b/test/NaCl/X86/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.s'] + +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/test/NaCl/X86/nacl-read-tp-intrinsic.ll b/test/NaCl/X86/nacl-read-tp-intrinsic.ll new file mode 100644 index 0000000000..2ad27fb6a4 --- /dev/null +++ b/test/NaCl/X86/nacl-read-tp-intrinsic.ll @@ -0,0 +1,36 @@ + +; RUN: llc -mtriple=i386-unknown-nacl -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble r -triple i386 - \ +; RUN: | FileCheck -check-prefix=X32 %s + +; RUN: llc -mtriple=i386-unknown-nacl -filetype=obj -mtls-use-call %s -o - \ +; RUN: | llvm-objdump -disassemble -r -triple i386 - \ +; RUN: | FileCheck -check-prefix=X32_IRT %s + +; RUN: llc -mtriple=x86_64-unknown-nacl -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -r -triple x86_64 - \ +; RUN: | FileCheck -check-prefix=X64 %s + +; "-mtls-use-call" should not make any difference on x86-64. +; RUN: llc -mtriple=x86_64-unknown-nacl -filetype=obj -mtls-use-call %s -o - \ +; RUN: | llvm-objdump -disassemble -r -triple x86_64 - \ +; RUN: | FileCheck -check-prefix=X64 %s + + +declare i8* @llvm.nacl.read.tp() + +define i8* @get_thread_pointer() { + %tp = call i8* @llvm.nacl.read.tp() + ret i8* %tp +} + +; X32: movl %gs:0, %eax + +; There appears to be a bug in llvm-objdump which stops it from +; showing the symbol name "__nacl_read_tp" in the relocation output on +; x86-32. +; X32_IRT: call +; X32_IRT-NEXT: R_386_PC32 Unknown + +; X64: call +; X64-NEXT: __nacl_read_tp |