diff options
-rw-r--r-- | lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp | 196 | ||||
-rw-r--r-- | test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll | 138 |
2 files changed, 334 insertions, 0 deletions
diff --git a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp index fc5138574d..3550cd9aca 100644 --- a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp +++ b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp @@ -19,12 +19,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/NaClAtomicIntrinsics.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" @@ -224,11 +226,27 @@ private: thawMemoryOrder(Call->getArgOperand(2)), SS, Call); break; case Intrinsic::nacl_atomic_rmw: + if (needsX8632HackFor16BitAtomics(cast<PointerType>( + Call->getArgOperand(1)->getType())->getElementType())) { + // TODO(jfb) Remove this hack. See below. + atomic16BitX8632Hack(Call, false, Call->getArgOperand(1), + Call->getArgOperand(2), Call->getArgOperand(0), + NULL); + return true; + } I = new AtomicRMWInst(thawRMWOperation(Call->getArgOperand(0)), Call->getArgOperand(1), Call->getArgOperand(2), thawMemoryOrder(Call->getArgOperand(3)), SS, Call); break; case Intrinsic::nacl_atomic_cmpxchg: + if (needsX8632HackFor16BitAtomics(cast<PointerType>( + Call->getArgOperand(0)->getType())->getElementType())) { + // TODO(jfb) Remove this hack. See below. + atomic16BitX8632Hack(Call, true, Call->getArgOperand(0), + Call->getArgOperand(2), NULL, + Call->getArgOperand(1)); + return true; + } // TODO LLVM currently doesn't support specifying separate memory // orders for compare exchange's success and failure cases: // LLVM IR implicitly drops the Release part of the specified @@ -304,6 +322,184 @@ private: } } + // TODO(jfb) Remove the following hacks once NaCl's x86-32 validator + // supports 16-bit atomic intrisics. See: + // https://code.google.com/p/nativeclient/issues/detail?id=3579 + // https://code.google.com/p/nativeclient/issues/detail?id=2981 + // =========================================================================== + bool needsX8632HackFor16BitAtomics(Type *OverloadedType) const { + return Triple(M->getTargetTriple()).getArch() == Triple::x86 && + OverloadedType == Type::getInt16Ty(M->getContext()); + } + + /// Expand the 16-bit Intrinsic into an equivalent 32-bit + /// compare-exchange loop. + void atomic16BitX8632Hack(IntrinsicInst *Call, bool IsCmpXChg, + Value *Ptr16, Value *RHS, Value *RMWOp, + Value *CmpXChgOldVal) const { + assert((IsCmpXChg ? CmpXChgOldVal : RMWOp) && + "cmpxchg expects an old value, whereas RMW expects an operation"); + Type *I16 = Type::getInt16Ty(M->getContext()); + Type *I32 = Type::getInt32Ty(M->getContext()); + Type *I32Ptr = Type::getInt32PtrTy(M->getContext()); + + // Precede this with a compiler fence. + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(M->getContext()), false); + std::string AsmString; // Empty. + std::string Constraints("~{memory}"); + bool HasSideEffect = true; + CallInst::Create(InlineAsm::get( + FTy, AsmString, Constraints, HasSideEffect), "", Call); + + BasicBlock *CurrentBB = Call->getParent(); + IRBuilder<> IRB(CurrentBB, Call); + BasicBlock *Aligned32BB = + BasicBlock::Create(IRB.getContext(), "atomic16aligned32", + CurrentBB->getParent()); + BasicBlock *Aligned16BB = + BasicBlock::Create(IRB.getContext(), "atomic16aligned16", + CurrentBB->getParent()); + + // Setup. + // Align the 16-bit pointer to 32-bits, and figure out if the 16-bit + // operation is to be carried on the top or bottom half of the + // 32-bit aligned value. + Value *IPtr = IRB.CreatePtrToInt(Ptr16, I32, "uintptr"); + Value *IPtrAlign = IRB.CreateAnd(IPtr, IRB.getInt32(~3u), "aligneduintptr"); + Value *Aligned32 = IRB.CreateAnd(IPtr, IRB.getInt32(3u), "aligned32"); + Value *Ptr32 = IRB.CreateIntToPtr(IPtrAlign, I32Ptr, "ptr32"); + Value *IsAligned32 = IRB.CreateICmpEQ(Aligned32, IRB.getInt32(0), + "isaligned32"); + IRB.CreateCondBr(IsAligned32, Aligned32BB, Aligned16BB); + + // Create a diamond after the setup. The rest of the basic block + // that the Call was in is separated into the successor block. + BasicBlock *Successor = + CurrentBB->splitBasicBlock(IRB.GetInsertPoint(), "atomic16successor"); + // Remove the extra unconditional branch that the split added. + CurrentBB->getTerminator()->eraseFromParent(); + + // Aligned 32 block. + // The 16-bit value was aligned to 32-bits: + // - Atomically load the full 32-bit value. + // - Get the 16-bit value from its bottom. + // - Perform the 16-bit operation. + // - Truncate and merge the result back with the top half of the + // loaded value. + // - Try to compare-exchange this new 32-bit result. This will + // succeed if the value at the 32-bit location is still what was + // just loaded. If not, try the entire thing again. + // - Return the 16-bit value before the operation was performed. + Value *Ret32; + { + IRB.SetInsertPoint(Aligned32BB); + LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded"); + Loaded->setAtomic(SequentiallyConsistent); + Value *TruncVal = IRB.CreateTrunc(Loaded, I16, "truncval"); + Ret32 = TruncVal; + Value *Res; + if (IsCmpXChg) { + Res = RHS; + } else { + switch (thawRMWOperation(RMWOp)) { + default: llvm_unreachable("unknown atomic RMW operation"); + case AtomicRMWInst::Add: + Res = IRB.CreateAdd(TruncVal, RHS, "res"); break; + case AtomicRMWInst::Sub: + Res = IRB.CreateSub(TruncVal, RHS, "res"); break; + case AtomicRMWInst::Or: + Res = IRB.CreateOr(TruncVal, RHS, "res"); break; + case AtomicRMWInst::And: + Res = IRB.CreateAnd(TruncVal, RHS, "res"); break; + case AtomicRMWInst::Xor: + Res = IRB.CreateXor(TruncVal, RHS, "res"); break; + case AtomicRMWInst::Xchg: + Res = RHS; break; + } + } + Value *MergeRes = IRB.CreateZExt(Res, I32, "mergeres"); + Value *MaskedLoaded = IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF0000u), + "maskedloaded"); + Value *FinalRes = IRB.CreateOr(MergeRes, MaskedLoaded, "finalres"); + Value *Expected = IsCmpXChg ? + IRB.CreateOr(MaskedLoaded, IRB.CreateZExt(CmpXChgOldVal, I32, "zext"), + "expected") : + Loaded; + Value *OldVal = IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes, + SequentiallyConsistent); + OldVal->setName("oldval"); + // Test that the entire 32-bit value didn't change during the operation. + Value *Success = IRB.CreateICmpEQ(OldVal, Loaded, "success"); + IRB.CreateCondBr(Success, Successor, Aligned32BB); + } + + // Aligned 16 block. + // Similar to the above aligned 32 block, but the 16-bit value is in + // the top half of the 32-bit value. It needs to be shifted down, + // and shifted back up before being merged in. + Value *Ret16; + { + IRB.SetInsertPoint(Aligned16BB); + LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded"); + Loaded->setAtomic(SequentiallyConsistent); + Value *ShVal = IRB.CreateTrunc(IRB.CreateLShr(Loaded, 16, "lshr"), I16, + "shval"); + Ret16 = ShVal; + Value *Res; + if (IsCmpXChg) { + Res = RHS; + } else { + switch (thawRMWOperation(RMWOp)) { + default: llvm_unreachable("unknown atomic RMW operation"); + case AtomicRMWInst::Add: + Res = IRB.CreateAdd(ShVal, RHS, "res"); break; + case AtomicRMWInst::Sub: + Res = IRB.CreateSub(ShVal, RHS, "res"); break; + case AtomicRMWInst::Or: + Res = IRB.CreateOr(ShVal, RHS, "res"); break; + case AtomicRMWInst::And: + Res = IRB.CreateAnd(ShVal, RHS, "res"); break; + case AtomicRMWInst::Xor: + Res = IRB.CreateXor(ShVal, RHS, "res"); break; + case AtomicRMWInst::Xchg: + Res = RHS; break; + } + } + Value *MergeRes = IRB.CreateShl(IRB.CreateZExt(Res, I32, "zext"), 16, + "mergeres"); + Value *MaskedLoaded = IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF), + "maskedloaded"); + Value *FinalRes = IRB.CreateOr(MergeRes, MaskedLoaded, "finalres"); + Value *Expected = IsCmpXChg ? + IRB.CreateOr(MaskedLoaded, IRB.CreateShl( + IRB.CreateZExt(CmpXChgOldVal, I32, "zext"), 16, "shl"), + "expected") : + Loaded; + Value *OldVal = IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes, + SequentiallyConsistent); + OldVal->setName("oldval"); + // Test that the entire 32-bit value didn't change during the operation. + Value *Success = IRB.CreateICmpEQ(OldVal, Loaded, "success"); + IRB.CreateCondBr(Success, Successor, Aligned16BB); + } + + // Merge the value, and remove the original intrinsic Call. + IRB.SetInsertPoint(Successor->getFirstInsertionPt()); + PHINode *PHI = IRB.CreatePHI(I16, 2); + PHI->addIncoming(Ret32, Aligned32BB); + PHI->addIncoming(Ret16, Aligned16BB); + Call->replaceAllUsesWith(PHI); + Call->eraseFromParent(); + + // Finish everything with another compiler fence. + CallInst::Create(InlineAsm::get( + FTy, AsmString, Constraints, HasSideEffect), "", + Successor->getFirstInsertionPt()); + } + // =========================================================================== + // End hacks. + AtomicCallResolver(const AtomicCallResolver &); AtomicCallResolver &operator=(const AtomicCallResolver &); }; diff --git a/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll b/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll new file mode 100644 index 0000000000..fc0e7c70f6 --- /dev/null +++ b/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll @@ -0,0 +1,138 @@ +; RUN: opt < %s -resolve-pnacl-intrinsics -S -mtriple=i386-unknown-nacl | \ +; RUN: FileCheck %s -check-prefix=CLEANED +; RUN: opt < %s -resolve-pnacl-intrinsics -S -mtriple=i386-unknown-nacl | \ +; RUN: FileCheck %s + +; CLEANED-NOT: call {{.*}} @llvm.nacl.atomic + +; Supplement to resolve-pnacl-intrinsics.ll that tests the 16-bit hack +; for x86-32. All of the RMW cases are the same except for one +; operation. + +; These declarations must be here because the function pass expects +; to find them. In real life they're inserted by the translator +; before the function pass runs. +declare i32 @setjmp(i8*) +declare void @longjmp(i8*, i32) + +declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32) +declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32) + +; CHECK: @test_fetch_and_add_i16 +define i16 @test_fetch_and_add_i16(i16* %ptr, i16 %value) { +; CHECK-NEXT: call void asm sideeffect "", "~{memory}"() +; CHECK-NEXT: %uintptr = ptrtoint i16* %ptr to i32 +; CHECK-NEXT: %aligneduintptr = and i32 %uintptr, -4 +; CHECK-NEXT: %aligned32 = and i32 %uintptr, 3 +; CHECK-NEXT: %ptr32 = inttoptr i32 %aligneduintptr to i32* +; CHECK-NEXT: %isaligned32 = icmp eq i32 %aligned32, 0 +; CHECK-NEXT: br i1 %isaligned32, label %atomic16aligned32, label %atomic16aligned16 +; +; CHECK: atomic16successor: +; CHECK-NEXT: %1 = phi i16 [ %truncval, %atomic16aligned32 ], [ %shval, %atomic16aligned16 ] +; CHECK-NEXT: call void asm sideeffect "", "~{memory}"() +; CHECK-NEXT: ret i16 %1 +; +; CHECK: atomic16aligned32: +; CHECK-NEXT: %loaded = load atomic i32* %ptr32 seq_cst, align 4 +; CHECK-NEXT: %truncval = trunc i32 %loaded to i16 +; CHECK-NEXT: %res = add i16 %truncval, %value +; CHECK-NEXT: %mergeres = zext i16 %res to i32 +; CHECK-NEXT: %maskedloaded = and i32 %loaded, -65536 +; CHECK-NEXT: %finalres = or i32 %mergeres, %maskedloaded +; CHECK-NEXT: %oldval = cmpxchg i32* %ptr32, i32 %loaded, i32 %finalres seq_cst +; CHECK-NEXT: %success = icmp eq i32 %oldval, %loaded +; CHECK-NEXT: br i1 %success, label %atomic16successor, label %atomic16aligned32 +; +; CHECK: atomic16aligned16: +; CHECK-NEXT: %loaded1 = load atomic i32* %ptr32 seq_cst, align 4 +; CHECK-NEXT: %lshr = lshr i32 %loaded1, 16 +; CHECK-NEXT: %shval = trunc i32 %lshr to i16 +; CHECK-NEXT: %res2 = add i16 %shval, %value +; CHECK-NEXT: %zext = zext i16 %res2 to i32 +; CHECK-NEXT: %mergeres3 = shl i32 %zext, 16 +; CHECK-NEXT: %maskedloaded4 = and i32 %loaded1, 65535 +; CHECK-NEXT: %finalres5 = or i32 %mergeres3, %maskedloaded4 +; CHECK-NEXT: %oldval6 = cmpxchg i32* %ptr32, i32 %loaded1, i32 %finalres5 seq_cst +; CHECK-NEXT: %success7 = icmp eq i32 %oldval6, %loaded1 +; CHECK-NEXT: br i1 %success7, label %atomic16successor, label %atomic16aligned16 + %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr, i16 %value, i32 6) + ret i16 %1 +} + +; CHECK: @test_fetch_and_sub_i16 +define i16 @test_fetch_and_sub_i16(i16* %ptr, i16 %value) { + ; CHECK: %res = sub i16 %truncval, %value + ; CHECK: %res2 = sub i16 %shval, %value + %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %value, i32 6) + ret i16 %1 +} + +; CHECK: @test_fetch_and_or_i16 +define i16 @test_fetch_and_or_i16(i16* %ptr, i16 %value) { + ; CHECK: %res = or i16 %truncval, %value + ; CHECK: %res2 = or i16 %shval, %value + %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %value, i32 6) + ret i16 %1 +} + +; CHECK: @test_fetch_and_and_i16 +define i16 @test_fetch_and_and_i16(i16* %ptr, i16 %value) { + ; CHECK: %res = and i16 %truncval, %value + ; CHECK: %res2 = and i16 %shval, %value + %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %value, i32 6) + ret i16 %1 +} + +; CHECK: @test_fetch_and_xor_i16 +define i16 @test_fetch_and_xor_i16(i16* %ptr, i16 %value) { + ; CHECK: %res = xor i16 %truncval, %value + ; CHECK: %res2 = xor i16 %shval, %value + %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %value, i32 6) + ret i16 %1 +} + +; CHECK: @test_val_compare_and_swap_i16 +define i16 @test_val_compare_and_swap_i16(i16* %ptr, i16 %oldval, i16 %newval) { +; CHECK-NEXT: call void asm sideeffect "", "~{memory}"() +; CHECK-NEXT: %uintptr = ptrtoint i16* %ptr to i32 +; CHECK-NEXT: %aligneduintptr = and i32 %uintptr, -4 +; CHECK-NEXT: %aligned32 = and i32 %uintptr, 3 +; CHECK-NEXT: %ptr32 = inttoptr i32 %aligneduintptr to i32* +; CHECK-NEXT: %isaligned32 = icmp eq i32 %aligned32, 0 +; CHECK-NEXT: br i1 %isaligned32, label %atomic16aligned32, label %atomic16aligned16 +; +; CHECK: atomic16successor: +; CHECK-NEXT: %1 = phi i16 [ %truncval, %atomic16aligned32 ], [ %shval, %atomic16aligned16 ] +; CHECK-NEXT: call void asm sideeffect "", "~{memory}"() +; CHECK-NEXT: ret i16 %1 +; +; CHECK: atomic16aligned32: +; CHECK-NEXT: %loaded = load atomic i32* %ptr32 seq_cst, align 4 +; CHECK-NEXT: %truncval = trunc i32 %loaded to i16 +; CHECK-NEXT: %mergeres = zext i16 %newval to i32 +; CHECK-NEXT: %maskedloaded = and i32 %loaded, -65536 +; CHECK-NEXT: %finalres = or i32 %mergeres, %maskedloaded +; CHECK-NEXT: %zext = zext i16 %oldval to i32 +; CHECK-NEXT: %expected = or i32 %maskedloaded, %zext +; CHECK-NEXT: %oldval1 = cmpxchg i32* %ptr32, i32 %expected, i32 %finalres seq_cst +; CHECK-NEXT: %success = icmp eq i32 %oldval1, %loaded +; CHECK-NEXT: br i1 %success, label %atomic16successor, label %atomic16aligned32 +; +; CHECK: atomic16aligned16: +; CHECK-NEXT: %loaded2 = load atomic i32* %ptr32 seq_cst, align 4 +; CHECK-NEXT: %lshr = lshr i32 %loaded2, 16 +; CHECK-NEXT: %shval = trunc i32 %lshr to i16 +; CHECK-NEXT: %zext3 = zext i16 %newval to i32 +; CHECK-NEXT: %mergeres4 = shl i32 %zext3, 16 +; CHECK-NEXT: %maskedloaded5 = and i32 %loaded2, 65535 +; CHECK-NEXT: %finalres6 = or i32 %mergeres4, %maskedloaded5 +; CHECK-NEXT: %zext7 = zext i16 %oldval to i32 +; CHECK-NEXT: %shl = shl i32 %zext7, 16 +; CHECK-NEXT: %expected8 = or i32 %maskedloaded5, %shl +; CHECK-NEXT: %oldval9 = cmpxchg i32* %ptr32, i32 %expected8, i32 %finalres6 seq_cst +; CHECK-NEXT: %success10 = icmp eq i32 %oldval9, %loaded2 +; CHECK-NEXT: br i1 %success10, label %atomic16successor, label %atomic16aligned16 + %1 = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %oldval, i16 %newval, i32 6, i32 6) + ret i16 %1 +} |