diff options
author | JF Bastien <jfb@chromium.org> | 2013-07-30 16:38:26 -0700 |
---|---|---|
committer | JF Bastien <jfb@chromium.org> | 2013-07-30 16:38:26 -0700 |
commit | f75fd0a9f95109b9cb13a74aad6dcc98c3d5d625 (patch) | |
tree | 652c465d76008ef58f5c8d55827050a6f1d875e1 | |
parent | 423b3bb89c78e96c59843aa7c6e55d01bde174d1 (diff) |
Rewrite ``asm("":::"memory")`` to ``fence seq_cst``
This is often used as a compiler barrier and should "just work" in user code.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=2345
R=eliben@chromium.org
TEST= (cd ./pnacl/build/llvm_x86_64 && ninja check-all)
Review URL: https://codereview.chromium.org/21178002
-rw-r--r-- | docs/PNaClDeveloperGuide.rst | 7 | ||||
-rw-r--r-- | include/llvm/InitializePasses.h | 1 | ||||
-rw-r--r-- | include/llvm/Transforms/NaCl.h | 1 | ||||
-rw-r--r-- | lib/Transforms/NaCl/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Transforms/NaCl/PNaClABISimplify.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/NaCl/RewriteAsmDirectives.cpp | 111 | ||||
-rw-r--r-- | test/Transforms/NaCl/rewrite-asm-memory.ll | 71 | ||||
-rw-r--r-- | tools/opt/opt.cpp | 1 |
8 files changed, 196 insertions, 1 deletions
diff --git a/docs/PNaClDeveloperGuide.rst b/docs/PNaClDeveloperGuide.rst index 7bd45dd6fd..b0a47423e3 100644 --- a/docs/PNaClDeveloperGuide.rst +++ b/docs/PNaClDeveloperGuide.rst @@ -130,3 +130,10 @@ As in C11/C++11: these primitives must always be of the same bit size for that location. - Not all memory orderings are valid for all atomic operations. +Inline Assembly +=============== + +Inline assembly isn't supported by PNaCl because it isn't portable. The +one current exception is the common compiler barrier idiom +``asm("":::"memory")``, which gets transformed to a sequentially +consistent memory barrier (equivalent to ``__sync_synchronize()``). diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index ee40f495b3..8352868d89 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -298,6 +298,7 @@ void initializePromoteIntegersPass(PassRegistry&); void initializeReplacePtrsWithIntsPass(PassRegistry&); void initializeResolveAliasesPass(PassRegistry&); void initializeResolvePNaClIntrinsicsPass(PassRegistry&); +void initializeRewriteAsmDirectivesPass(PassRegistry&); void initializeRewriteAtomicsPass(PassRegistry&); void initializeRewriteLLVMIntrinsicsPass(PassRegistry&); void initializeRewritePNaClLibraryCallsPass(PassRegistry&); diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h index ec1a54aaa8..3cf306499e 100644 --- a/include/llvm/Transforms/NaCl.h +++ b/include/llvm/Transforms/NaCl.h @@ -29,6 +29,7 @@ FunctionPass *createExpandStructRegsPass(); FunctionPass *createInsertDivideCheckPass(); FunctionPass *createPromoteIntegersPass(); FunctionPass *createResolvePNaClIntrinsicsPass(); +FunctionPass *createRewriteAsmDirectivesPass(); ModulePass *createAddPNaClExternalDeclsPass(); ModulePass *createCanonicalizeMemIntrinsicsPass(); ModulePass *createExpandArithWithOverflowPass(); diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt index e91d79f184..909cd8c68d 100644 --- a/lib/Transforms/NaCl/CMakeLists.txt +++ b/lib/Transforms/NaCl/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMNaClTransforms PromoteIntegers.cpp ReplacePtrsWithInts.cpp ResolvePNaClIntrinsics.cpp + RewriteAsmDirectives.cpp RewriteAtomics.cpp RewriteLLVMIntrinsics.cpp RewritePNaClLibraryCalls.cpp diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp index 189d87f2cb..4f8e0e4f87 100644 --- a/lib/Transforms/NaCl/PNaClABISimplify.cpp +++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp @@ -30,8 +30,10 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) { // LowerExpect converts Intrinsic::expect into branch weights, // which can then be removed after BlockPlacement. PM.add(createLowerExpectIntrinsicPass()); - // Rewrite unsupported intrinsics to simpler constructs. + // Rewrite unsupported intrinsics and inline assembly directives to + // simpler and portable constructs. PM.add(createRewriteLLVMIntrinsicsPass()); + PM.add(createRewriteAsmDirectivesPass()); // LowerInvoke prevents use of C++ exception handling, which is not // yet supported in the PNaCl ABI. PM.add(createLowerInvokePass()); diff --git a/lib/Transforms/NaCl/RewriteAsmDirectives.cpp b/lib/Transforms/NaCl/RewriteAsmDirectives.cpp new file mode 100644 index 0000000000..cb726d2365 --- /dev/null +++ b/lib/Transforms/NaCl/RewriteAsmDirectives.cpp @@ -0,0 +1,111 @@ +//===- RewriteAsmDirectives.cpp - Handle Architecture-Independent Assembly-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass rewrites any inline assembly directive which is portable +// into LLVM bitcode. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/InstVisitor.h" +#include "llvm/Pass.h" +#include <string> + +using namespace llvm; + +namespace { +class RewriteAsmDirectives : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + RewriteAsmDirectives() : FunctionPass(ID) { + initializeRewriteAsmDirectivesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); +}; + +class AsmDirectivesVisitor : public InstVisitor<AsmDirectivesVisitor> { +public: + AsmDirectivesVisitor(Function &F) + : F(F), C(F.getParent()->getContext()), ModifiedFunction(false) {} + ~AsmDirectivesVisitor() {} + bool modifiedFunction() const { return ModifiedFunction; } + + /// Only Call Instructions are ever inline assembly directives. + void visitCallInst(CallInst &CI); + +private: + Function &F; + LLVMContext &C; + bool ModifiedFunction; + + AsmDirectivesVisitor() LLVM_DELETED_FUNCTION; + AsmDirectivesVisitor(const AsmDirectivesVisitor &) LLVM_DELETED_FUNCTION; + AsmDirectivesVisitor &operator=(const AsmDirectivesVisitor &) LLVM_DELETED_FUNCTION; +}; +} + +char RewriteAsmDirectives::ID = 0; +INITIALIZE_PASS( + RewriteAsmDirectives, "rewrite-asm-directives", + "rewrite portable inline assembly directives into non-asm LLVM IR", + false, false) + +bool RewriteAsmDirectives::runOnFunction(Function &F) { + AsmDirectivesVisitor AV(F); + AV.visit(F); + return AV.modifiedFunction(); +} + +void AsmDirectivesVisitor::visitCallInst(CallInst &CI) { + if (!CI.isInlineAsm()) + return; + + Instruction *Replacement = NULL; + + InlineAsm *IA = cast<InlineAsm>(CI.getCalledValue()); + std::string AsmStr(IA->getAsmString()); + std::string ConstraintStr(IA->getConstraintString()); + Type *T = CI.getType(); + + bool isEmptyAsm = AsmStr.empty(); + // Different triples will encode "touch everything" differently, e.g.: + // - le32-unknown-nacl has "~{memory}". + // - x86 "~{memory},~{dirflag},~{fpsr},~{flags}". + // The following code therefore only searches for memory: this pass + // deals with portable assembly, touching anything else than memory in + // an empty assembly statement is meaningless. + bool touchesMemory = ConstraintStr.find("~{memory}") != std::string::npos; + + if (T->isVoidTy() && IA->hasSideEffects() && isEmptyAsm && touchesMemory) { + // asm("":::"memory") => fence seq_cst + // This transformation is safe and strictly stronger: the former is + // purely a compiler fence, whereas the latter is a compiler fence + // as well as a hardware fence which orders all loads and stores on + // the current thread of execution. + Replacement = new FenceInst(C, SequentiallyConsistent, CrossThread, &CI); + } + + if (Replacement) { + Replacement->setDebugLoc(CI.getDebugLoc()); + CI.replaceAllUsesWith(Replacement); + CI.eraseFromParent(); + ModifiedFunction = true; + } +} + +namespace llvm { +FunctionPass *createRewriteAsmDirectivesPass() { + return new RewriteAsmDirectives(); +} +} diff --git a/test/Transforms/NaCl/rewrite-asm-memory.ll b/test/Transforms/NaCl/rewrite-asm-memory.ll new file mode 100644 index 0000000000..8481c831ee --- /dev/null +++ b/test/Transforms/NaCl/rewrite-asm-memory.ll @@ -0,0 +1,71 @@ +; RUN: opt < %s -rewrite-asm-directives -S | FileCheck %s +; RUN: opt < %s -O3 -rewrite-asm-directives -S | FileCheck %s +; RUN: opt < %s -O3 -rewrite-asm-directives -S | FileCheck %s -check-prefix=ELIM +; RUN: opt < %s -rewrite-asm-directives -S | FileCheck %s -check-prefix=CLEANED + +; Test that asm("":::"memory"), a compiler barrier, gets rewritten to a +; sequentially-consistent fence. The test is also run at O3 to make sure +; that loads and stores don't get unexpectedly eliminated. + +; CLEANED-NOT: asm + +@a = external global i32 +@b = external global i32 + +; Different triples encode "touch everything" constraints differently. +define void @memory_assembly_encoding_test() { +; CHECK: @memory_assembly_encoding_test() + call void asm sideeffect "", "~{memory}"() + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() + ; CHECK-NEXT: fence seq_cst + ; CHECK-NEXT: fence seq_cst + + ret void + ; CHECK-NEXT: ret void +} + +define void @memory_assembly_ordering_test() { +; CHECK: @memory_assembly_ordering_test() + %1 = load i32* @a, align 4 + store i32 %1, i32* @b, align 4 + call void asm sideeffect "", "~{memory}"() + ; CHECK-NEXT: %1 = load i32* @a, align 4 + ; CHECK-NEXT: store i32 %1, i32* @b, align 4 + ; CHECK-NEXT: fence seq_cst + + ; Redundant load from the previous location, and store to the same + ; location (making the previous one dead). Shouldn't get eliminated + ; because of the fence. + %2 = load i32* @a, align 4 + store i32 %2, i32* @b, align 4 + call void asm sideeffect "", "~{memory}"() + ; CHECK-NEXT: %2 = load i32* @a, align 4 + ; CHECK-NEXT: store i32 %2, i32* @b, align 4 + ; CHECK-NEXT: fence seq_cst + + ; Same here. + %3 = load i32* @a, align 4 + store i32 %3, i32* @b, align 4 + ; CHECK-NEXT: %3 = load i32* @a, align 4 + ; CHECK-NEXT: store i32 %3, i32* @b, align 4 + + ret void + ; CHECK-NEXT: ret void +} + +; Same function as above, but without the barriers. At O3 some loads and +; stores should get eliminated. +define void @memory_ordering_test() { +; ELIM: @memory_ordering_test() + %1 = load i32* @a, align 4 + store i32 %1, i32* @b, align 4 + %2 = load i32* @a, align 4 + store i32 %2, i32* @b, align 4 + %3 = load i32* @a, align 4 + store i32 %3, i32* @b, align 4 + ; ELIM-NEXT: %1 = load i32* @a, align 4 + ; ELIM-NEXT: store i32 %1, i32* @b, align 4 + + ret void + ; ELIM-NEXT: ret void +} diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index a9c19a33dc..5682a55909 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -626,6 +626,7 @@ int main(int argc, char **argv) { initializeReplacePtrsWithIntsPass(Registry); initializeResolveAliasesPass(Registry); initializeResolvePNaClIntrinsicsPass(Registry); + initializeRewriteAsmDirectivesPass(Registry); initializeRewriteAtomicsPass(Registry); initializeRewriteLLVMIntrinsicsPass(Registry); initializeRewritePNaClLibraryCallsPass(Registry); |