aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp196
-rw-r--r--test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll138
2 files changed, 334 insertions, 0 deletions
diff --git a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
index fc5138574d..3550cd9aca 100644
--- a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
+++ b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
@@ -19,12 +19,14 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NaClAtomicIntrinsics.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
@@ -224,11 +226,27 @@ private:
thawMemoryOrder(Call->getArgOperand(2)), SS, Call);
break;
case Intrinsic::nacl_atomic_rmw:
+ if (needsX8632HackFor16BitAtomics(cast<PointerType>(
+ Call->getArgOperand(1)->getType())->getElementType())) {
+ // TODO(jfb) Remove this hack. See below.
+ atomic16BitX8632Hack(Call, false, Call->getArgOperand(1),
+ Call->getArgOperand(2), Call->getArgOperand(0),
+ NULL);
+ return true;
+ }
I = new AtomicRMWInst(thawRMWOperation(Call->getArgOperand(0)),
Call->getArgOperand(1), Call->getArgOperand(2),
thawMemoryOrder(Call->getArgOperand(3)), SS, Call);
break;
case Intrinsic::nacl_atomic_cmpxchg:
+ if (needsX8632HackFor16BitAtomics(cast<PointerType>(
+ Call->getArgOperand(0)->getType())->getElementType())) {
+ // TODO(jfb) Remove this hack. See below.
+ atomic16BitX8632Hack(Call, true, Call->getArgOperand(0),
+ Call->getArgOperand(2), NULL,
+ Call->getArgOperand(1));
+ return true;
+ }
// TODO LLVM currently doesn't support specifying separate memory
// orders for compare exchange's success and failure cases:
// LLVM IR implicitly drops the Release part of the specified
@@ -304,6 +322,184 @@ private:
}
}
+ // TODO(jfb) Remove the following hacks once NaCl's x86-32 validator
+ // supports 16-bit atomic intrisics. See:
+ // https://code.google.com/p/nativeclient/issues/detail?id=3579
+ // https://code.google.com/p/nativeclient/issues/detail?id=2981
+ // ===========================================================================
+ bool needsX8632HackFor16BitAtomics(Type *OverloadedType) const {
+ return Triple(M->getTargetTriple()).getArch() == Triple::x86 &&
+ OverloadedType == Type::getInt16Ty(M->getContext());
+ }
+
+ /// Expand the 16-bit Intrinsic into an equivalent 32-bit
+ /// compare-exchange loop.
+ void atomic16BitX8632Hack(IntrinsicInst *Call, bool IsCmpXChg,
+ Value *Ptr16, Value *RHS, Value *RMWOp,
+ Value *CmpXChgOldVal) const {
+ assert((IsCmpXChg ? CmpXChgOldVal : RMWOp) &&
+ "cmpxchg expects an old value, whereas RMW expects an operation");
+ Type *I16 = Type::getInt16Ty(M->getContext());
+ Type *I32 = Type::getInt32Ty(M->getContext());
+ Type *I32Ptr = Type::getInt32PtrTy(M->getContext());
+
+ // Precede this with a compiler fence.
+ FunctionType *FTy =
+ FunctionType::get(Type::getVoidTy(M->getContext()), false);
+ std::string AsmString; // Empty.
+ std::string Constraints("~{memory}");
+ bool HasSideEffect = true;
+ CallInst::Create(InlineAsm::get(
+ FTy, AsmString, Constraints, HasSideEffect), "", Call);
+
+ BasicBlock *CurrentBB = Call->getParent();
+ IRBuilder<> IRB(CurrentBB, Call);
+ BasicBlock *Aligned32BB =
+ BasicBlock::Create(IRB.getContext(), "atomic16aligned32",
+ CurrentBB->getParent());
+ BasicBlock *Aligned16BB =
+ BasicBlock::Create(IRB.getContext(), "atomic16aligned16",
+ CurrentBB->getParent());
+
+ // Setup.
+ // Align the 16-bit pointer to 32-bits, and figure out if the 16-bit
+ // operation is to be carried on the top or bottom half of the
+ // 32-bit aligned value.
+ Value *IPtr = IRB.CreatePtrToInt(Ptr16, I32, "uintptr");
+ Value *IPtrAlign = IRB.CreateAnd(IPtr, IRB.getInt32(~3u), "aligneduintptr");
+ Value *Aligned32 = IRB.CreateAnd(IPtr, IRB.getInt32(3u), "aligned32");
+ Value *Ptr32 = IRB.CreateIntToPtr(IPtrAlign, I32Ptr, "ptr32");
+ Value *IsAligned32 = IRB.CreateICmpEQ(Aligned32, IRB.getInt32(0),
+ "isaligned32");
+ IRB.CreateCondBr(IsAligned32, Aligned32BB, Aligned16BB);
+
+ // Create a diamond after the setup. The rest of the basic block
+ // that the Call was in is separated into the successor block.
+ BasicBlock *Successor =
+ CurrentBB->splitBasicBlock(IRB.GetInsertPoint(), "atomic16successor");
+ // Remove the extra unconditional branch that the split added.
+ CurrentBB->getTerminator()->eraseFromParent();
+
+ // Aligned 32 block.
+ // The 16-bit value was aligned to 32-bits:
+ // - Atomically load the full 32-bit value.
+ // - Get the 16-bit value from its bottom.
+ // - Perform the 16-bit operation.
+ // - Truncate and merge the result back with the top half of the
+ // loaded value.
+ // - Try to compare-exchange this new 32-bit result. This will
+ // succeed if the value at the 32-bit location is still what was
+ // just loaded. If not, try the entire thing again.
+ // - Return the 16-bit value before the operation was performed.
+ Value *Ret32;
+ {
+ IRB.SetInsertPoint(Aligned32BB);
+ LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded");
+ Loaded->setAtomic(SequentiallyConsistent);
+ Value *TruncVal = IRB.CreateTrunc(Loaded, I16, "truncval");
+ Ret32 = TruncVal;
+ Value *Res;
+ if (IsCmpXChg) {
+ Res = RHS;
+ } else {
+ switch (thawRMWOperation(RMWOp)) {
+ default: llvm_unreachable("unknown atomic RMW operation");
+ case AtomicRMWInst::Add:
+ Res = IRB.CreateAdd(TruncVal, RHS, "res"); break;
+ case AtomicRMWInst::Sub:
+ Res = IRB.CreateSub(TruncVal, RHS, "res"); break;
+ case AtomicRMWInst::Or:
+ Res = IRB.CreateOr(TruncVal, RHS, "res"); break;
+ case AtomicRMWInst::And:
+ Res = IRB.CreateAnd(TruncVal, RHS, "res"); break;
+ case AtomicRMWInst::Xor:
+ Res = IRB.CreateXor(TruncVal, RHS, "res"); break;
+ case AtomicRMWInst::Xchg:
+ Res = RHS; break;
+ }
+ }
+ Value *MergeRes = IRB.CreateZExt(Res, I32, "mergeres");
+ Value *MaskedLoaded = IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF0000u),
+ "maskedloaded");
+ Value *FinalRes = IRB.CreateOr(MergeRes, MaskedLoaded, "finalres");
+ Value *Expected = IsCmpXChg ?
+ IRB.CreateOr(MaskedLoaded, IRB.CreateZExt(CmpXChgOldVal, I32, "zext"),
+ "expected") :
+ Loaded;
+ Value *OldVal = IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes,
+ SequentiallyConsistent);
+ OldVal->setName("oldval");
+ // Test that the entire 32-bit value didn't change during the operation.
+ Value *Success = IRB.CreateICmpEQ(OldVal, Loaded, "success");
+ IRB.CreateCondBr(Success, Successor, Aligned32BB);
+ }
+
+ // Aligned 16 block.
+ // Similar to the above aligned 32 block, but the 16-bit value is in
+ // the top half of the 32-bit value. It needs to be shifted down,
+ // and shifted back up before being merged in.
+ Value *Ret16;
+ {
+ IRB.SetInsertPoint(Aligned16BB);
+ LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded");
+ Loaded->setAtomic(SequentiallyConsistent);
+ Value *ShVal = IRB.CreateTrunc(IRB.CreateLShr(Loaded, 16, "lshr"), I16,
+ "shval");
+ Ret16 = ShVal;
+ Value *Res;
+ if (IsCmpXChg) {
+ Res = RHS;
+ } else {
+ switch (thawRMWOperation(RMWOp)) {
+ default: llvm_unreachable("unknown atomic RMW operation");
+ case AtomicRMWInst::Add:
+ Res = IRB.CreateAdd(ShVal, RHS, "res"); break;
+ case AtomicRMWInst::Sub:
+ Res = IRB.CreateSub(ShVal, RHS, "res"); break;
+ case AtomicRMWInst::Or:
+ Res = IRB.CreateOr(ShVal, RHS, "res"); break;
+ case AtomicRMWInst::And:
+ Res = IRB.CreateAnd(ShVal, RHS, "res"); break;
+ case AtomicRMWInst::Xor:
+ Res = IRB.CreateXor(ShVal, RHS, "res"); break;
+ case AtomicRMWInst::Xchg:
+ Res = RHS; break;
+ }
+ }
+ Value *MergeRes = IRB.CreateShl(IRB.CreateZExt(Res, I32, "zext"), 16,
+ "mergeres");
+ Value *MaskedLoaded = IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF),
+ "maskedloaded");
+ Value *FinalRes = IRB.CreateOr(MergeRes, MaskedLoaded, "finalres");
+ Value *Expected = IsCmpXChg ?
+ IRB.CreateOr(MaskedLoaded, IRB.CreateShl(
+ IRB.CreateZExt(CmpXChgOldVal, I32, "zext"), 16, "shl"),
+ "expected") :
+ Loaded;
+ Value *OldVal = IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes,
+ SequentiallyConsistent);
+ OldVal->setName("oldval");
+ // Test that the entire 32-bit value didn't change during the operation.
+ Value *Success = IRB.CreateICmpEQ(OldVal, Loaded, "success");
+ IRB.CreateCondBr(Success, Successor, Aligned16BB);
+ }
+
+ // Merge the value, and remove the original intrinsic Call.
+ IRB.SetInsertPoint(Successor->getFirstInsertionPt());
+ PHINode *PHI = IRB.CreatePHI(I16, 2);
+ PHI->addIncoming(Ret32, Aligned32BB);
+ PHI->addIncoming(Ret16, Aligned16BB);
+ Call->replaceAllUsesWith(PHI);
+ Call->eraseFromParent();
+
+ // Finish everything with another compiler fence.
+ CallInst::Create(InlineAsm::get(
+ FTy, AsmString, Constraints, HasSideEffect), "",
+ Successor->getFirstInsertionPt());
+ }
+ // ===========================================================================
+ // End hacks.
+
AtomicCallResolver(const AtomicCallResolver &);
AtomicCallResolver &operator=(const AtomicCallResolver &);
};
diff --git a/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll b/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll
new file mode 100644
index 0000000000..fc0e7c70f6
--- /dev/null
+++ b/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll
@@ -0,0 +1,138 @@
+; RUN: opt < %s -resolve-pnacl-intrinsics -S -mtriple=i386-unknown-nacl | \
+; RUN: FileCheck %s -check-prefix=CLEANED
+; RUN: opt < %s -resolve-pnacl-intrinsics -S -mtriple=i386-unknown-nacl | \
+; RUN: FileCheck %s
+
+; CLEANED-NOT: call {{.*}} @llvm.nacl.atomic
+
+; Supplement to resolve-pnacl-intrinsics.ll that tests the 16-bit hack
+; for x86-32. All of the RMW cases are the same except for one
+; operation.
+
+; These declarations must be here because the function pass expects
+; to find them. In real life they're inserted by the translator
+; before the function pass runs.
+declare i32 @setjmp(i8*)
+declare void @longjmp(i8*, i32)
+
+declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32)
+declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32)
+
+; CHECK: @test_fetch_and_add_i16
+define i16 @test_fetch_and_add_i16(i16* %ptr, i16 %value) {
+; CHECK-NEXT: call void asm sideeffect "", "~{memory}"()
+; CHECK-NEXT: %uintptr = ptrtoint i16* %ptr to i32
+; CHECK-NEXT: %aligneduintptr = and i32 %uintptr, -4
+; CHECK-NEXT: %aligned32 = and i32 %uintptr, 3
+; CHECK-NEXT: %ptr32 = inttoptr i32 %aligneduintptr to i32*
+; CHECK-NEXT: %isaligned32 = icmp eq i32 %aligned32, 0
+; CHECK-NEXT: br i1 %isaligned32, label %atomic16aligned32, label %atomic16aligned16
+;
+; CHECK: atomic16successor:
+; CHECK-NEXT: %1 = phi i16 [ %truncval, %atomic16aligned32 ], [ %shval, %atomic16aligned16 ]
+; CHECK-NEXT: call void asm sideeffect "", "~{memory}"()
+; CHECK-NEXT: ret i16 %1
+;
+; CHECK: atomic16aligned32:
+; CHECK-NEXT: %loaded = load atomic i32* %ptr32 seq_cst, align 4
+; CHECK-NEXT: %truncval = trunc i32 %loaded to i16
+; CHECK-NEXT: %res = add i16 %truncval, %value
+; CHECK-NEXT: %mergeres = zext i16 %res to i32
+; CHECK-NEXT: %maskedloaded = and i32 %loaded, -65536
+; CHECK-NEXT: %finalres = or i32 %mergeres, %maskedloaded
+; CHECK-NEXT: %oldval = cmpxchg i32* %ptr32, i32 %loaded, i32 %finalres seq_cst
+; CHECK-NEXT: %success = icmp eq i32 %oldval, %loaded
+; CHECK-NEXT: br i1 %success, label %atomic16successor, label %atomic16aligned32
+;
+; CHECK: atomic16aligned16:
+; CHECK-NEXT: %loaded1 = load atomic i32* %ptr32 seq_cst, align 4
+; CHECK-NEXT: %lshr = lshr i32 %loaded1, 16
+; CHECK-NEXT: %shval = trunc i32 %lshr to i16
+; CHECK-NEXT: %res2 = add i16 %shval, %value
+; CHECK-NEXT: %zext = zext i16 %res2 to i32
+; CHECK-NEXT: %mergeres3 = shl i32 %zext, 16
+; CHECK-NEXT: %maskedloaded4 = and i32 %loaded1, 65535
+; CHECK-NEXT: %finalres5 = or i32 %mergeres3, %maskedloaded4
+; CHECK-NEXT: %oldval6 = cmpxchg i32* %ptr32, i32 %loaded1, i32 %finalres5 seq_cst
+; CHECK-NEXT: %success7 = icmp eq i32 %oldval6, %loaded1
+; CHECK-NEXT: br i1 %success7, label %atomic16successor, label %atomic16aligned16
+ %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr, i16 %value, i32 6)
+ ret i16 %1
+}
+
+; CHECK: @test_fetch_and_sub_i16
+define i16 @test_fetch_and_sub_i16(i16* %ptr, i16 %value) {
+ ; CHECK: %res = sub i16 %truncval, %value
+ ; CHECK: %res2 = sub i16 %shval, %value
+ %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %value, i32 6)
+ ret i16 %1
+}
+
+; CHECK: @test_fetch_and_or_i16
+define i16 @test_fetch_and_or_i16(i16* %ptr, i16 %value) {
+ ; CHECK: %res = or i16 %truncval, %value
+ ; CHECK: %res2 = or i16 %shval, %value
+ %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %value, i32 6)
+ ret i16 %1
+}
+
+; CHECK: @test_fetch_and_and_i16
+define i16 @test_fetch_and_and_i16(i16* %ptr, i16 %value) {
+ ; CHECK: %res = and i16 %truncval, %value
+ ; CHECK: %res2 = and i16 %shval, %value
+ %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %value, i32 6)
+ ret i16 %1
+}
+
+; CHECK: @test_fetch_and_xor_i16
+define i16 @test_fetch_and_xor_i16(i16* %ptr, i16 %value) {
+ ; CHECK: %res = xor i16 %truncval, %value
+ ; CHECK: %res2 = xor i16 %shval, %value
+ %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %value, i32 6)
+ ret i16 %1
+}
+
+; CHECK: @test_val_compare_and_swap_i16
+define i16 @test_val_compare_and_swap_i16(i16* %ptr, i16 %oldval, i16 %newval) {
+; CHECK-NEXT: call void asm sideeffect "", "~{memory}"()
+; CHECK-NEXT: %uintptr = ptrtoint i16* %ptr to i32
+; CHECK-NEXT: %aligneduintptr = and i32 %uintptr, -4
+; CHECK-NEXT: %aligned32 = and i32 %uintptr, 3
+; CHECK-NEXT: %ptr32 = inttoptr i32 %aligneduintptr to i32*
+; CHECK-NEXT: %isaligned32 = icmp eq i32 %aligned32, 0
+; CHECK-NEXT: br i1 %isaligned32, label %atomic16aligned32, label %atomic16aligned16
+;
+; CHECK: atomic16successor:
+; CHECK-NEXT: %1 = phi i16 [ %truncval, %atomic16aligned32 ], [ %shval, %atomic16aligned16 ]
+; CHECK-NEXT: call void asm sideeffect "", "~{memory}"()
+; CHECK-NEXT: ret i16 %1
+;
+; CHECK: atomic16aligned32:
+; CHECK-NEXT: %loaded = load atomic i32* %ptr32 seq_cst, align 4
+; CHECK-NEXT: %truncval = trunc i32 %loaded to i16
+; CHECK-NEXT: %mergeres = zext i16 %newval to i32
+; CHECK-NEXT: %maskedloaded = and i32 %loaded, -65536
+; CHECK-NEXT: %finalres = or i32 %mergeres, %maskedloaded
+; CHECK-NEXT: %zext = zext i16 %oldval to i32
+; CHECK-NEXT: %expected = or i32 %maskedloaded, %zext
+; CHECK-NEXT: %oldval1 = cmpxchg i32* %ptr32, i32 %expected, i32 %finalres seq_cst
+; CHECK-NEXT: %success = icmp eq i32 %oldval1, %loaded
+; CHECK-NEXT: br i1 %success, label %atomic16successor, label %atomic16aligned32
+;
+; CHECK: atomic16aligned16:
+; CHECK-NEXT: %loaded2 = load atomic i32* %ptr32 seq_cst, align 4
+; CHECK-NEXT: %lshr = lshr i32 %loaded2, 16
+; CHECK-NEXT: %shval = trunc i32 %lshr to i16
+; CHECK-NEXT: %zext3 = zext i16 %newval to i32
+; CHECK-NEXT: %mergeres4 = shl i32 %zext3, 16
+; CHECK-NEXT: %maskedloaded5 = and i32 %loaded2, 65535
+; CHECK-NEXT: %finalres6 = or i32 %mergeres4, %maskedloaded5
+; CHECK-NEXT: %zext7 = zext i16 %oldval to i32
+; CHECK-NEXT: %shl = shl i32 %zext7, 16
+; CHECK-NEXT: %expected8 = or i32 %maskedloaded5, %shl
+; CHECK-NEXT: %oldval9 = cmpxchg i32* %ptr32, i32 %expected8, i32 %finalres6 seq_cst
+; CHECK-NEXT: %success10 = icmp eq i32 %oldval9, %loaded2
+; CHECK-NEXT: br i1 %success10, label %atomic16successor, label %atomic16aligned16
+ %1 = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %oldval, i16 %newval, i32 6, i32 6)
+ ret i16 %1
+}