diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp index fc5138574d..3550cd9aca 100644 --- a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp +++ b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp @@ -19,12 +19,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/NaClAtomicIntrinsics.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" @@ -224,11 +226,27 @@ private: thawMemoryOrder(Call->getArgOperand(2)), SS, Call); break; case Intrinsic::nacl_atomic_rmw: + if (needsX8632HackFor16BitAtomics(cast<PointerType>( + Call->getArgOperand(1)->getType())->getElementType())) { + // TODO(jfb) Remove this hack. See below. + atomic16BitX8632Hack(Call, false, Call->getArgOperand(1), + Call->getArgOperand(2), Call->getArgOperand(0), + NULL); + return true; + } I = new AtomicRMWInst(thawRMWOperation(Call->getArgOperand(0)), Call->getArgOperand(1), Call->getArgOperand(2), thawMemoryOrder(Call->getArgOperand(3)), SS, Call); break; case Intrinsic::nacl_atomic_cmpxchg: + if (needsX8632HackFor16BitAtomics(cast<PointerType>( + Call->getArgOperand(0)->getType())->getElementType())) { + // TODO(jfb) Remove this hack. See below. + atomic16BitX8632Hack(Call, true, Call->getArgOperand(0), + Call->getArgOperand(2), NULL, + Call->getArgOperand(1)); + return true; + } // TODO LLVM currently doesn't support specifying separate memory // orders for compare exchange's success and failure cases: // LLVM IR implicitly drops the Release part of the specified @@ -304,6 +322,184 @@ private: } } + // TODO(jfb) Remove the following hacks once NaCl's x86-32 validator + // supports 16-bit atomic intrisics. See: + // https://code.google.com/p/nativeclient/issues/detail?id=3579 + // https://code.google.com/p/nativeclient/issues/detail?id=2981 + // =========================================================================== + bool needsX8632HackFor16BitAtomics(Type *OverloadedType) const { + return Triple(M->getTargetTriple()).getArch() == Triple::x86 && + OverloadedType == Type::getInt16Ty(M->getContext()); + } + + /// Expand the 16-bit Intrinsic into an equivalent 32-bit + /// compare-exchange loop. + void atomic16BitX8632Hack(IntrinsicInst *Call, bool IsCmpXChg, + Value *Ptr16, Value *RHS, Value *RMWOp, + Value *CmpXChgOldVal) const { + assert((IsCmpXChg ? CmpXChgOldVal : RMWOp) && + "cmpxchg expects an old value, whereas RMW expects an operation"); + Type *I16 = Type::getInt16Ty(M->getContext()); + Type *I32 = Type::getInt32Ty(M->getContext()); + Type *I32Ptr = Type::getInt32PtrTy(M->getContext()); + + // Precede this with a compiler fence. + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(M->getContext()), false); + std::string AsmString; // Empty. + std::string Constraints("~{memory}"); + bool HasSideEffect = true; + CallInst::Create(InlineAsm::get( + FTy, AsmString, Constraints, HasSideEffect), "", Call); + + BasicBlock *CurrentBB = Call->getParent(); + IRBuilder<> IRB(CurrentBB, Call); + BasicBlock *Aligned32BB = + BasicBlock::Create(IRB.getContext(), "atomic16aligned32", + CurrentBB->getParent()); + BasicBlock *Aligned16BB = + BasicBlock::Create(IRB.getContext(), "atomic16aligned16", + CurrentBB->getParent()); + + // Setup. + // Align the 16-bit pointer to 32-bits, and figure out if the 16-bit + // operation is to be carried on the top or bottom half of the + // 32-bit aligned value. + Value *IPtr = IRB.CreatePtrToInt(Ptr16, I32, "uintptr"); + Value *IPtrAlign = IRB.CreateAnd(IPtr, IRB.getInt32(~3u), "aligneduintptr"); + Value *Aligned32 = IRB.CreateAnd(IPtr, IRB.getInt32(3u), "aligned32"); + Value *Ptr32 = IRB.CreateIntToPtr(IPtrAlign, I32Ptr, "ptr32"); + Value *IsAligned32 = IRB.CreateICmpEQ(Aligned32, IRB.getInt32(0), + "isaligned32"); + IRB.CreateCondBr(IsAligned32, Aligned32BB, Aligned16BB); + + // Create a diamond after the setup. The rest of the basic block + // that the Call was in is separated into the successor block. + BasicBlock *Successor = + CurrentBB->splitBasicBlock(IRB.GetInsertPoint(), "atomic16successor"); + // Remove the extra unconditional branch that the split added. + CurrentBB->getTerminator()->eraseFromParent(); + + // Aligned 32 block. + // The 16-bit value was aligned to 32-bits: + // - Atomically load the full 32-bit value. + // - Get the 16-bit value from its bottom. + // - Perform the 16-bit operation. + // - Truncate and merge the result back with the top half of the + // loaded value. + // - Try to compare-exchange this new 32-bit result. This will + // succeed if the value at the 32-bit location is still what was + // just loaded. If not, try the entire thing again. + // - Return the 16-bit value before the operation was performed. + Value *Ret32; + { + IRB.SetInsertPoint(Aligned32BB); + LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded"); + Loaded->setAtomic(SequentiallyConsistent); + Value *TruncVal = IRB.CreateTrunc(Loaded, I16, "truncval"); + Ret32 = TruncVal; + Value *Res; + if (IsCmpXChg) { + Res = RHS; + } else { + switch (thawRMWOperation(RMWOp)) { + default: llvm_unreachable("unknown atomic RMW operation"); + case AtomicRMWInst::Add: + Res = IRB.CreateAdd(TruncVal, RHS, "res"); break; + case AtomicRMWInst::Sub: + Res = IRB.CreateSub(TruncVal, RHS, "res"); break; + case AtomicRMWInst::Or: + Res = IRB.CreateOr(TruncVal, RHS, "res"); break; + case AtomicRMWInst::And: + Res = IRB.CreateAnd(TruncVal, RHS, "res"); break; + case AtomicRMWInst::Xor: + Res = IRB.CreateXor(TruncVal, RHS, "res"); break; + case AtomicRMWInst::Xchg: + Res = RHS; break; + } + } + Value *MergeRes = IRB.CreateZExt(Res, I32, "mergeres"); + Value *MaskedLoaded = IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF0000u), + "maskedloaded"); + Value *FinalRes = IRB.CreateOr(MergeRes, MaskedLoaded, "finalres"); + Value *Expected = IsCmpXChg ? + IRB.CreateOr(MaskedLoaded, IRB.CreateZExt(CmpXChgOldVal, I32, "zext"), + "expected") : + Loaded; + Value *OldVal = IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes, + SequentiallyConsistent); + OldVal->setName("oldval"); + // Test that the entire 32-bit value didn't change during the operation. + Value *Success = IRB.CreateICmpEQ(OldVal, Loaded, "success"); + IRB.CreateCondBr(Success, Successor, Aligned32BB); + } + + // Aligned 16 block. + // Similar to the above aligned 32 block, but the 16-bit value is in + // the top half of the 32-bit value. It needs to be shifted down, + // and shifted back up before being merged in. + Value *Ret16; + { + IRB.SetInsertPoint(Aligned16BB); + LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded"); + Loaded->setAtomic(SequentiallyConsistent); + Value *ShVal = IRB.CreateTrunc(IRB.CreateLShr(Loaded, 16, "lshr"), I16, + "shval"); + Ret16 = ShVal; + Value *Res; + if (IsCmpXChg) { + Res = RHS; + } else { + switch (thawRMWOperation(RMWOp)) { + default: llvm_unreachable("unknown atomic RMW operation"); + case AtomicRMWInst::Add: + Res = IRB.CreateAdd(ShVal, RHS, "res"); break; + case AtomicRMWInst::Sub: + Res = IRB.CreateSub(ShVal, RHS, "res"); break; + case AtomicRMWInst::Or: + Res = IRB.CreateOr(ShVal, RHS, "res"); break; + case AtomicRMWInst::And: + Res = IRB.CreateAnd(ShVal, RHS, "res"); break; + case AtomicRMWInst::Xor: + Res = IRB.CreateXor(ShVal, RHS, "res"); break; + case AtomicRMWInst::Xchg: + Res = RHS; break; + } + } + Value *MergeRes = IRB.CreateShl(IRB.CreateZExt(Res, I32, "zext"), 16, + "mergeres"); + Value *MaskedLoaded = IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF), + "maskedloaded"); + Value *FinalRes = IRB.CreateOr(MergeRes, MaskedLoaded, "finalres"); + Value *Expected = IsCmpXChg ? + IRB.CreateOr(MaskedLoaded, IRB.CreateShl( + IRB.CreateZExt(CmpXChgOldVal, I32, "zext"), 16, "shl"), + "expected") : + Loaded; + Value *OldVal = IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes, + SequentiallyConsistent); + OldVal->setName("oldval"); + // Test that the entire 32-bit value didn't change during the operation. + Value *Success = IRB.CreateICmpEQ(OldVal, Loaded, "success"); + IRB.CreateCondBr(Success, Successor, Aligned16BB); + } + + // Merge the value, and remove the original intrinsic Call. + IRB.SetInsertPoint(Successor->getFirstInsertionPt()); + PHINode *PHI = IRB.CreatePHI(I16, 2); + PHI->addIncoming(Ret32, Aligned32BB); + PHI->addIncoming(Ret16, Aligned16BB); + Call->replaceAllUsesWith(PHI); + Call->eraseFromParent(); + + // Finish everything with another compiler fence. + CallInst::Create(InlineAsm::get( + FTy, AsmString, Constraints, HasSideEffect), "", + Successor->getFirstInsertionPt()); + } + // =========================================================================== + // End hacks. + AtomicCallResolver(const AtomicCallResolver &); AtomicCallResolver &operator=(const AtomicCallResolver &); }; |