1 files changed, 196 insertions, 0 deletions
diff --git a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
index fc5138574d..3550cd9aca 100644
--- a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
+++ b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
@@ -19,12 +19,14 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/NaClAtomicIntrinsics.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
@@ -224,11 +226,27 @@ private:
                         thawMemoryOrder(Call->getArgOperand(2)), SS, Call);
       break;
     case Intrinsic::nacl_atomic_rmw:
+      if (needsX8632HackFor16BitAtomics(cast<PointerType>(
+              Call->getArgOperand(1)->getType())->getElementType())) {
+        // TODO(jfb) Remove this hack. See below.
+        atomic16BitX8632Hack(Call, false, Call->getArgOperand(1),
+                             Call->getArgOperand(2), Call->getArgOperand(0),
+                             NULL);
+        return true;
+      }
       I = new AtomicRMWInst(thawRMWOperation(Call->getArgOperand(0)),
                             Call->getArgOperand(1), Call->getArgOperand(2),
                             thawMemoryOrder(Call->getArgOperand(3)), SS, Call);
       break;
     case Intrinsic::nacl_atomic_cmpxchg:
+      if (needsX8632HackFor16BitAtomics(cast<PointerType>(
+              Call->getArgOperand(0)->getType())->getElementType())) {
+        // TODO(jfb) Remove this hack. See below.
+        atomic16BitX8632Hack(Call, true, Call->getArgOperand(0),
+                             Call->getArgOperand(2), NULL,
+                             Call->getArgOperand(1));
+        return true;
+      }
       // TODO LLVM currently doesn't support specifying separate memory
       //      orders for compare exchange's success and failure cases:
       //      LLVM IR implicitly drops the Release part of the specified
@@ -304,6 +322,184 @@ private:
     }
   }
 
+  // TODO(jfb) Remove the following hacks once NaCl's x86-32 validator
+  // supports 16-bit atomic intrisics. See:
+  //   https://code.google.com/p/nativeclient/issues/detail?id=3579
+  //   https://code.google.com/p/nativeclient/issues/detail?id=2981
+  // ===========================================================================
+  bool needsX8632HackFor16BitAtomics(Type *OverloadedType) const {
+    return Triple(M->getTargetTriple()).getArch() == Triple::x86 &&
+        OverloadedType == Type::getInt16Ty(M->getContext());
+  }
+
+  /// Expand the 16-bit Intrinsic into an equivalent 32-bit
+  /// compare-exchange loop.
+  void atomic16BitX8632Hack(IntrinsicInst *Call, bool IsCmpXChg,
+                            Value *Ptr16, Value *RHS, Value *RMWOp,
+                            Value *CmpXChgOldVal) const {
+    assert((IsCmpXChg ? CmpXChgOldVal : RMWOp) &&
+           "cmpxchg expects an old value, whereas RMW expects an operation");
+    Type *I16 = Type::getInt16Ty(M->getContext());
+    Type *I32 = Type::getInt32Ty(M->getContext());
+    Type *I32Ptr = Type::getInt32PtrTy(M->getContext());
+
+    // Precede this with a compiler fence.
+    FunctionType *FTy =
+        FunctionType::get(Type::getVoidTy(M->getContext()), false);
+    std::string AsmString; // Empty.
+    std::string Constraints("~{memory}");
+    bool HasSideEffect = true;
+    CallInst::Create(InlineAsm::get(
+        FTy, AsmString, Constraints, HasSideEffect), "", Call);
+
+    BasicBlock *CurrentBB = Call->getParent();
+    IRBuilder<> IRB(CurrentBB, Call);
+    BasicBlock *Aligned32BB =
+        BasicBlock::Create(IRB.getContext(), "atomic16aligned32",
+                           CurrentBB->getParent());
+    BasicBlock *Aligned16BB =
+        BasicBlock::Create(IRB.getContext(), "atomic16aligned16",
+                           CurrentBB->getParent());
+
+    // Setup.
+    // Align the 16-bit pointer to 32-bits, and figure out if the 16-bit
+    // operation is to be carried on the top or bottom half of the
+    // 32-bit aligned value.
+    Value *IPtr = IRB.CreatePtrToInt(Ptr16, I32, "uintptr");
+    Value *IPtrAlign = IRB.CreateAnd(IPtr, IRB.getInt32(~3u), "aligneduintptr");
+    Value *Aligned32 = IRB.CreateAnd(IPtr, IRB.getInt32(3u), "aligned32");
+    Value *Ptr32 = IRB.CreateIntToPtr(IPtrAlign, I32Ptr, "ptr32");
+    Value *IsAligned32 = IRB.CreateICmpEQ(Aligned32, IRB.getInt32(0),
+                                          "isaligned32");
+    IRB.CreateCondBr(IsAligned32, Aligned32BB, Aligned16BB);
+
+    // Create a diamond after the setup. The rest of the basic block
+    // that the Call was in is separated into the successor block.
+    BasicBlock *Successor =
+        CurrentBB->splitBasicBlock(IRB.GetInsertPoint(), "atomic16successor");
+    // Remove the extra unconditional branch that the split added.
+    CurrentBB->getTerminator()->eraseFromParent();
+
+    // Aligned 32 block.
+    // The 16-bit value was aligned to 32-bits:
+    //  - Atomically load the full 32-bit value.
+    //  - Get the 16-bit value from its bottom.
+    //  - Perform the 16-bit operation.
+    //  - Truncate and merge the result back with the top half of the
+    //    loaded value.
+    //  - Try to compare-exchange this new 32-bit result. This will
+    //    succeed if the value at the 32-bit location is still what was
+    //    just loaded. If not, try the entire thing again.
+    //  - Return the 16-bit value before the operation was performed.
+    Value *Ret32;
+    {
+      IRB.SetInsertPoint(Aligned32BB);
+      LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded");
+      Loaded->setAtomic(SequentiallyConsistent);
+      Value *TruncVal = IRB.CreateTrunc(Loaded, I16, "truncval");
+      Ret32 = TruncVal;
+      Value *Res;
+      if (IsCmpXChg) {
+        Res = RHS;
+      } else {
+        switch (thawRMWOperation(RMWOp)) {
+        default: llvm_unreachable("unknown atomic RMW operation");
+        case AtomicRMWInst::Add:
+          Res = IRB.CreateAdd(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::Sub:
+          Res = IRB.CreateSub(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::Or:
+          Res = IRB.CreateOr(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::And:
+          Res = IRB.CreateAnd(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::Xor:
+          Res = IRB.CreateXor(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::Xchg:
+          Res = RHS; break;
+        }
+      }
+      Value *MergeRes = IRB.CreateZExt(Res, I32, "mergeres");
+      Value *MaskedLoaded = IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF0000u),
+                                          "maskedloaded");
+      Value *FinalRes = IRB.CreateOr(MergeRes, MaskedLoaded, "finalres");
+      Value *Expected = IsCmpXChg ?
+          IRB.CreateOr(MaskedLoaded, IRB.CreateZExt(CmpXChgOldVal, I32, "zext"),
+                       "expected") :
+          Loaded;
+      Value *OldVal = IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes,
+                                              SequentiallyConsistent);
+      OldVal->setName("oldval");
+      // Test that the entire 32-bit value didn't change during the operation.
+      Value *Success = IRB.CreateICmpEQ(OldVal, Loaded, "success");
+      IRB.CreateCondBr(Success, Successor, Aligned32BB);
+    }
+
+    // Aligned 16 block.
+    // Similar to the above aligned 32 block, but the 16-bit value is in
+    // the top half of the 32-bit value. It needs to be shifted down,
+    // and shifted back up before being merged in.
+    Value *Ret16;
+    {
+      IRB.SetInsertPoint(Aligned16BB);
+      LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded");
+      Loaded->setAtomic(SequentiallyConsistent);
+      Value *ShVal = IRB.CreateTrunc(IRB.CreateLShr(Loaded, 16, "lshr"), I16,
+                                     "shval");
+      Ret16 = ShVal;
+      Value *Res;
+      if (IsCmpXChg) {
+        Res = RHS;
+      } else {
+        switch (thawRMWOperation(RMWOp)) {
+        default: llvm_unreachable("unknown atomic RMW operation");
+        case AtomicRMWInst::Add:
+          Res = IRB.CreateAdd(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::Sub:
+          Res = IRB.CreateSub(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::Or:
+          Res = IRB.CreateOr(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::And:
+          Res = IRB.CreateAnd(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::Xor:
+          Res = IRB.CreateXor(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::Xchg:
+          Res = RHS; break;
+        }
+      }
+      Value *MergeRes = IRB.CreateShl(IRB.CreateZExt(Res, I32, "zext"), 16,
+                                      "mergeres");
+      Value *MaskedLoaded = IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF),
+                                          "maskedloaded");
+      Value *FinalRes = IRB.CreateOr(MergeRes, MaskedLoaded, "finalres");
+      Value *Expected = IsCmpXChg ?
+          IRB.CreateOr(MaskedLoaded, IRB.CreateShl(
+              IRB.CreateZExt(CmpXChgOldVal, I32, "zext"), 16, "shl"),
+                       "expected") :
+          Loaded;
+      Value *OldVal = IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes,
+                                              SequentiallyConsistent);
+      OldVal->setName("oldval");
+      // Test that the entire 32-bit value didn't change during the operation.
+      Value *Success = IRB.CreateICmpEQ(OldVal, Loaded, "success");
+      IRB.CreateCondBr(Success, Successor, Aligned16BB);
+    }
+
+    // Merge the value, and remove the original intrinsic Call.
+    IRB.SetInsertPoint(Successor->getFirstInsertionPt());
+    PHINode *PHI = IRB.CreatePHI(I16, 2);
+    PHI->addIncoming(Ret32, Aligned32BB);
+    PHI->addIncoming(Ret16, Aligned16BB);
+    Call->replaceAllUsesWith(PHI);
+    Call->eraseFromParent();
+
+    // Finish everything with another compiler fence.
+    CallInst::Create(InlineAsm::get(
+        FTy, AsmString, Constraints, HasSideEffect), "",
+                     Successor->getFirstInsertionPt());
+  }
+  // ===========================================================================
+  // End hacks.
+
   AtomicCallResolver(const AtomicCallResolver &);
   AtomicCallResolver &operator=(const AtomicCallResolver &);
 };