aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJF Bastien <jfb@chromium.org>2013-07-30 16:38:26 -0700
committerJF Bastien <jfb@chromium.org>2013-07-30 16:38:26 -0700
commitf75fd0a9f95109b9cb13a74aad6dcc98c3d5d625 (patch)
tree652c465d76008ef58f5c8d55827050a6f1d875e1
parent423b3bb89c78e96c59843aa7c6e55d01bde174d1 (diff)
Rewrite ``asm("":::"memory")`` to ``fence seq_cst``
This is often used as a compiler barrier and should "just work" in user code. BUG= https://code.google.com/p/nativeclient/issues/detail?id=2345 R=eliben@chromium.org TEST= (cd ./pnacl/build/llvm_x86_64 && ninja check-all) Review URL: https://codereview.chromium.org/21178002
-rw-r--r--docs/PNaClDeveloperGuide.rst7
-rw-r--r--include/llvm/InitializePasses.h1
-rw-r--r--include/llvm/Transforms/NaCl.h1
-rw-r--r--lib/Transforms/NaCl/CMakeLists.txt1
-rw-r--r--lib/Transforms/NaCl/PNaClABISimplify.cpp4
-rw-r--r--lib/Transforms/NaCl/RewriteAsmDirectives.cpp111
-rw-r--r--test/Transforms/NaCl/rewrite-asm-memory.ll71
-rw-r--r--tools/opt/opt.cpp1
8 files changed, 196 insertions, 1 deletions
diff --git a/docs/PNaClDeveloperGuide.rst b/docs/PNaClDeveloperGuide.rst
index 7bd45dd6fd..b0a47423e3 100644
--- a/docs/PNaClDeveloperGuide.rst
+++ b/docs/PNaClDeveloperGuide.rst
@@ -130,3 +130,10 @@ As in C11/C++11:
these primitives must always be of the same bit size for that location.
- Not all memory orderings are valid for all atomic operations.
+Inline Assembly
+===============
+
+Inline assembly isn't supported by PNaCl because it isn't portable. The
+one current exception is the common compiler barrier idiom
+``asm("":::"memory")``, which gets transformed to a sequentially
+consistent memory barrier (equivalent to ``__sync_synchronize()``).
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index ee40f495b3..8352868d89 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -298,6 +298,7 @@ void initializePromoteIntegersPass(PassRegistry&);
void initializeReplacePtrsWithIntsPass(PassRegistry&);
void initializeResolveAliasesPass(PassRegistry&);
void initializeResolvePNaClIntrinsicsPass(PassRegistry&);
+void initializeRewriteAsmDirectivesPass(PassRegistry&);
void initializeRewriteAtomicsPass(PassRegistry&);
void initializeRewriteLLVMIntrinsicsPass(PassRegistry&);
void initializeRewritePNaClLibraryCallsPass(PassRegistry&);
diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h
index ec1a54aaa8..3cf306499e 100644
--- a/include/llvm/Transforms/NaCl.h
+++ b/include/llvm/Transforms/NaCl.h
@@ -29,6 +29,7 @@ FunctionPass *createExpandStructRegsPass();
FunctionPass *createInsertDivideCheckPass();
FunctionPass *createPromoteIntegersPass();
FunctionPass *createResolvePNaClIntrinsicsPass();
+FunctionPass *createRewriteAsmDirectivesPass();
ModulePass *createAddPNaClExternalDeclsPass();
ModulePass *createCanonicalizeMemIntrinsicsPass();
ModulePass *createExpandArithWithOverflowPass();
diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt
index e91d79f184..909cd8c68d 100644
--- a/lib/Transforms/NaCl/CMakeLists.txt
+++ b/lib/Transforms/NaCl/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMNaClTransforms
PromoteIntegers.cpp
ReplacePtrsWithInts.cpp
ResolvePNaClIntrinsics.cpp
+ RewriteAsmDirectives.cpp
RewriteAtomics.cpp
RewriteLLVMIntrinsics.cpp
RewritePNaClLibraryCalls.cpp
diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp
index 189d87f2cb..4f8e0e4f87 100644
--- a/lib/Transforms/NaCl/PNaClABISimplify.cpp
+++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp
@@ -30,8 +30,10 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) {
// LowerExpect converts Intrinsic::expect into branch weights,
// which can then be removed after BlockPlacement.
PM.add(createLowerExpectIntrinsicPass());
- // Rewrite unsupported intrinsics to simpler constructs.
+ // Rewrite unsupported intrinsics and inline assembly directives to
+ // simpler and portable constructs.
PM.add(createRewriteLLVMIntrinsicsPass());
+ PM.add(createRewriteAsmDirectivesPass());
// LowerInvoke prevents use of C++ exception handling, which is not
// yet supported in the PNaCl ABI.
PM.add(createLowerInvokePass());
diff --git a/lib/Transforms/NaCl/RewriteAsmDirectives.cpp b/lib/Transforms/NaCl/RewriteAsmDirectives.cpp
new file mode 100644
index 0000000000..cb726d2365
--- /dev/null
+++ b/lib/Transforms/NaCl/RewriteAsmDirectives.cpp
@@ -0,0 +1,111 @@
+//===- RewriteAsmDirectives.cpp - Handle Architecture-Independent Assembly-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass rewrites any inline assembly directive which is portable
+// into LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include <string>
+
+using namespace llvm;
+
+namespace {
+class RewriteAsmDirectives : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ RewriteAsmDirectives() : FunctionPass(ID) {
+ initializeRewriteAsmDirectivesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+};
+
+class AsmDirectivesVisitor : public InstVisitor<AsmDirectivesVisitor> {
+public:
+ AsmDirectivesVisitor(Function &F)
+ : F(F), C(F.getParent()->getContext()), ModifiedFunction(false) {}
+ ~AsmDirectivesVisitor() {}
+ bool modifiedFunction() const { return ModifiedFunction; }
+
+ /// Only Call Instructions are ever inline assembly directives.
+ void visitCallInst(CallInst &CI);
+
+private:
+ Function &F;
+ LLVMContext &C;
+ bool ModifiedFunction;
+
+ AsmDirectivesVisitor() LLVM_DELETED_FUNCTION;
+ AsmDirectivesVisitor(const AsmDirectivesVisitor &) LLVM_DELETED_FUNCTION;
+ AsmDirectivesVisitor &operator=(const AsmDirectivesVisitor &) LLVM_DELETED_FUNCTION;
+};
+}
+
+char RewriteAsmDirectives::ID = 0;
+INITIALIZE_PASS(
+ RewriteAsmDirectives, "rewrite-asm-directives",
+ "rewrite portable inline assembly directives into non-asm LLVM IR",
+ false, false)
+
+bool RewriteAsmDirectives::runOnFunction(Function &F) {
+ AsmDirectivesVisitor AV(F);
+ AV.visit(F);
+ return AV.modifiedFunction();
+}
+
+void AsmDirectivesVisitor::visitCallInst(CallInst &CI) {
+ if (!CI.isInlineAsm())
+ return;
+
+ Instruction *Replacement = NULL;
+
+ InlineAsm *IA = cast<InlineAsm>(CI.getCalledValue());
+ std::string AsmStr(IA->getAsmString());
+ std::string ConstraintStr(IA->getConstraintString());
+ Type *T = CI.getType();
+
+ bool isEmptyAsm = AsmStr.empty();
+ // Different triples will encode "touch everything" differently, e.g.:
+ // - le32-unknown-nacl has "~{memory}".
+ // - x86 "~{memory},~{dirflag},~{fpsr},~{flags}".
+ // The following code therefore only searches for memory: this pass
+ // deals with portable assembly, touching anything else than memory in
+ // an empty assembly statement is meaningless.
+ bool touchesMemory = ConstraintStr.find("~{memory}") != std::string::npos;
+
+ if (T->isVoidTy() && IA->hasSideEffects() && isEmptyAsm && touchesMemory) {
+ // asm("":::"memory") => fence seq_cst
+ // This transformation is safe and strictly stronger: the former is
+ // purely a compiler fence, whereas the latter is a compiler fence
+ // as well as a hardware fence which orders all loads and stores on
+ // the current thread of execution.
+ Replacement = new FenceInst(C, SequentiallyConsistent, CrossThread, &CI);
+ }
+
+ if (Replacement) {
+ Replacement->setDebugLoc(CI.getDebugLoc());
+ CI.replaceAllUsesWith(Replacement);
+ CI.eraseFromParent();
+ ModifiedFunction = true;
+ }
+}
+
+namespace llvm {
+FunctionPass *createRewriteAsmDirectivesPass() {
+ return new RewriteAsmDirectives();
+}
+}
diff --git a/test/Transforms/NaCl/rewrite-asm-memory.ll b/test/Transforms/NaCl/rewrite-asm-memory.ll
new file mode 100644
index 0000000000..8481c831ee
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-asm-memory.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -rewrite-asm-directives -S | FileCheck %s
+; RUN: opt < %s -O3 -rewrite-asm-directives -S | FileCheck %s
+; RUN: opt < %s -O3 -rewrite-asm-directives -S | FileCheck %s -check-prefix=ELIM
+; RUN: opt < %s -rewrite-asm-directives -S | FileCheck %s -check-prefix=CLEANED
+
+; Test that asm("":::"memory"), a compiler barrier, gets rewritten to a
+; sequentially-consistent fence. The test is also run at O3 to make sure
+; that loads and stores don't get unexpectedly eliminated.
+
+; CLEANED-NOT: asm
+
+@a = external global i32
+@b = external global i32
+
+; Different triples encode "touch everything" constraints differently.
+define void @memory_assembly_encoding_test() {
+; CHECK: @memory_assembly_encoding_test()
+ call void asm sideeffect "", "~{memory}"()
+ call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
+ ; CHECK-NEXT: fence seq_cst
+ ; CHECK-NEXT: fence seq_cst
+
+ ret void
+ ; CHECK-NEXT: ret void
+}
+
+define void @memory_assembly_ordering_test() {
+; CHECK: @memory_assembly_ordering_test()
+ %1 = load i32* @a, align 4
+ store i32 %1, i32* @b, align 4
+ call void asm sideeffect "", "~{memory}"()
+ ; CHECK-NEXT: %1 = load i32* @a, align 4
+ ; CHECK-NEXT: store i32 %1, i32* @b, align 4
+ ; CHECK-NEXT: fence seq_cst
+
+ ; Redundant load from the previous location, and store to the same
+ ; location (making the previous one dead). Shouldn't get eliminated
+ ; because of the fence.
+ %2 = load i32* @a, align 4
+ store i32 %2, i32* @b, align 4
+ call void asm sideeffect "", "~{memory}"()
+ ; CHECK-NEXT: %2 = load i32* @a, align 4
+ ; CHECK-NEXT: store i32 %2, i32* @b, align 4
+ ; CHECK-NEXT: fence seq_cst
+
+ ; Same here.
+ %3 = load i32* @a, align 4
+ store i32 %3, i32* @b, align 4
+ ; CHECK-NEXT: %3 = load i32* @a, align 4
+ ; CHECK-NEXT: store i32 %3, i32* @b, align 4
+
+ ret void
+ ; CHECK-NEXT: ret void
+}
+
+; Same function as above, but without the barriers. At O3 some loads and
+; stores should get eliminated.
+define void @memory_ordering_test() {
+; ELIM: @memory_ordering_test()
+ %1 = load i32* @a, align 4
+ store i32 %1, i32* @b, align 4
+ %2 = load i32* @a, align 4
+ store i32 %2, i32* @b, align 4
+ %3 = load i32* @a, align 4
+ store i32 %3, i32* @b, align 4
+ ; ELIM-NEXT: %1 = load i32* @a, align 4
+ ; ELIM-NEXT: store i32 %1, i32* @b, align 4
+
+ ret void
+ ; ELIM-NEXT: ret void
+}
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index a9c19a33dc..5682a55909 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -626,6 +626,7 @@ int main(int argc, char **argv) {
initializeReplacePtrsWithIntsPass(Registry);
initializeResolveAliasesPass(Registry);
initializeResolvePNaClIntrinsicsPass(Registry);
+ initializeRewriteAsmDirectivesPass(Registry);
initializeRewriteAtomicsPass(Registry);
initializeRewriteLLVMIntrinsicsPass(Registry);
initializeRewritePNaClLibraryCallsPass(Registry);