Improve C11 atomics support:

- Generate atomicrmw operations in most of the cases when it's sensible to do so. - Don't crash in several common cases (and hopefully don't crash in more of them). - Add some better tests. We now generate significantly better code for things like: _Atomic(int) x; ... x++; On MIPS, this now generates a 4-instruction ll/sc loop, where previously it generated about 30 instructions in two nested loops. On x86-64, we generate a single lock incl, instead of a lock cmpxchgl loop (one instruction instead of ten). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@176420 91177308-0d34-0410-b5e6-96231b3b80d8
author: David Chisnall <csdavec@swan.ac.uk> 2013-03-03 16:02:42 +0000
committer: David Chisnall <csdavec@swan.ac.uk> 2013-03-03 16:02:42 +0000
commit: 72c1dba494b02960284bc6618c1b640c950c3785 (patch)
tree: 7871ecd146763704e3207ccd2abcae724f0d7bfe
parent: 5b9f5cc7c4310aec110f315df6fc6d6366b24b08 (diff)
3 files changed, 228 insertions, 12 deletions
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 1bfd4146de..7df4818e5d 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                            bool isInc, bool isPre) {
   
   QualType type = E->getSubExpr()->getType();
-  llvm::Value *value = EmitLoadOfLValue(LV);
-  llvm::Value *input = value;
   llvm::PHINode *atomicPHI = 0;
+  llvm::Value *value;
+  llvm::Value *input;
 
   int amount = (isInc ? 1 : -1);
 
   if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
+    type = atomicTy->getValueType();
+    if (isInc && type->isBooleanType()) {
+      llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
+      if (isPre) {
+        Builder.Insert(new llvm::StoreInst(True,
+              LV.getAddress(), LV.isVolatileQualified(),
+              LV.getAlignment().getQuantity(),
+              llvm::SequentiallyConsistent));
+        return Builder.getTrue();
+      }
+      // For atomic bool increment, we just store true and return it for
+      // preincrement, do an atomic swap with true for postincrement
+        return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
+            LV.getAddress(), True, llvm::SequentiallyConsistent);
+    }
+    // Special case for atomic increment / decrement on integers, emit
+    // atomicrmw instructions.  We skip this if we want to be doing overflow
+    // checking, and fall into the slow path with the atomic cmpxchg loop.  
+    if (!type->isBooleanType() && type->isIntegerType() &&
+        !(type->isUnsignedIntegerType() &&
+         CGF.SanOpts->UnsignedIntegerOverflow) &&
+        CGF.getLangOpts().getSignedOverflowBehavior() !=
+         LangOptions::SOB_Trapping) {
+      llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
+        llvm::AtomicRMWInst::Sub;
+      llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
+        llvm::Instruction::Sub;
+      llvm::Value *amt = CGF.EmitToMemory(
+          llvm::ConstantInt::get(ConvertType(type), 1, true), type);
+      llvm::Value *old = Builder.CreateAtomicRMW(aop,
+          LV.getAddress(), amt, llvm::SequentiallyConsistent);
+      return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+    }
+    value = EmitLoadOfLValue(LV);
+    input = value;
+    // For every other atomic operation, we need to emit a load-op-cmpxchg loop
     llvm::BasicBlock *startBB = Builder.GetInsertBlock();
     llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
+    value = CGF.EmitToMemory(value, type);
     Builder.CreateBr(opBB);
     Builder.SetInsertPoint(opBB);
     atomicPHI = Builder.CreatePHI(value->getType(), 2);
     atomicPHI->addIncoming(value, startBB);
-    type = atomicTy->getValueType();
     value = atomicPHI;
+  } else {
+    value = EmitLoadOfLValue(LV);
+    input = value;
   }
 
   // Special case of integer increment that we have to check first: bool++.
@@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
     llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
-        value, llvm::SequentiallyConsistent);
+        CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
     atomicPHI->addIncoming(old, opBB);
     llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
     Builder.CreateCondBr(success, contBB, opBB);
@@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
   OpInfo.E = E;
   // Load/convert the LHS.
   LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
-  OpInfo.LHS = EmitLoadOfLValue(LHSLV);
 
   llvm::PHINode *atomicPHI = 0;
-  if (LHSTy->isAtomicType()) {
+  if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
+    QualType type = atomicTy->getValueType();
+    if (!type->isBooleanType() && type->isIntegerType() &&
+         !(type->isUnsignedIntegerType() &&
+          CGF.SanOpts->UnsignedIntegerOverflow) &&
+         CGF.getLangOpts().getSignedOverflowBehavior() !=
+          LangOptions::SOB_Trapping) {
+      llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
+      switch (OpInfo.Opcode) {
+        // We don't have atomicrmw operands for *, %, /, <<, >>
+        case BO_MulAssign: case BO_DivAssign:
+        case BO_RemAssign:
+        case BO_ShlAssign:
+        case BO_ShrAssign:
+          break;
+        case BO_AddAssign:
+          aop = llvm::AtomicRMWInst::Add;
+          break;
+        case BO_SubAssign:
+          aop = llvm::AtomicRMWInst::Sub;
+          break;
+        case BO_AndAssign:
+          aop = llvm::AtomicRMWInst::And;
+          break;
+        case BO_XorAssign:
+          aop = llvm::AtomicRMWInst::Xor;
+          break;
+        case BO_OrAssign:
+          aop = llvm::AtomicRMWInst::Or;
+          break;
+        default:
+          llvm_unreachable("Invalid compound assignment type");
+      }
+      if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
+        llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
+              E->getRHS()->getType(), LHSTy), LHSTy);
+        Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
+            llvm::SequentiallyConsistent);
+        return LHSLV;
+      }
+    }
     // FIXME: For floating point types, we should be saving and restoring the
     // floating point environment in the loop.
     llvm::BasicBlock *startBB = Builder.GetInsertBlock();
     llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
+    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
+    OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
     Builder.CreateBr(opBB);
     Builder.SetInsertPoint(opBB);
     atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
     atomicPHI->addIncoming(OpInfo.LHS, startBB);
     OpInfo.LHS = atomicPHI;
   }
+  else
+    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
 
   OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
                                     E->getComputationLHSType());
@@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
     llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
-        Result, llvm::SequentiallyConsistent);
+        CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
     atomicPHI->addIncoming(old, opBB);
     llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
     Builder.CreateCondBr(success, contBB, opBB);
diff --git a/test/CodeGen/atomic_ops.c b/test/CodeGen/atomic_ops.c
index 481d1e06fb..910e9b9505 100644
--- a/test/CodeGen/atomic_ops.c
+++ b/test/CodeGen/atomic_ops.c
@@ -15,9 +15,4 @@ void foo(int x)
   // CHECK: sdiv i32
   // CHECK: cmpxchg i16*
 
-  // These should be emitting atomicrmw instructions, but they aren't yet
-  i += 2; // CHECK: cmpxchg
-  i -= 2; // CHECK: cmpxchg
-  i++; // CHECK: cmpxchg
-  i--; // CHECK: cmpxchg
 }
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
new file mode 100644
index 0000000000..fd5d3dea98
--- /dev/null
+++ b/test/CodeGen/c11atomics.c
@@ -0,0 +1,139 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
+
+// Test that we are generating atomicrmw instructions, rather than
+// compare-exchange loops for common atomic ops.  This makes a big difference
+// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
+// the load and then another ll/sc in the loop, expanding to about 30
+// instructions when it should be only 4.  It has a smaller, but still
+// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
+// RMW instructions.
+//
+// We currently emit cmpxchg loops for most operations on _Bools, because
+// they're sufficiently rare that it's not worth making sure that the semantics
+// are correct.
+
+typedef int __attribute__((vector_size(16))) vector;
+
+_Atomic(_Bool) b;
+_Atomic(int) i;
+_Atomic(long long) l;
+_Atomic(short) s;
+_Atomic(char*) p;
+_Atomic(float) f;
+_Atomic(vector) v;
+
+// CHECK-NOT: cmpxchg
+
+// CHECK: testinc
+void testinc(void)
+{
+  // Special case for suffix bool++, sets to true and returns the old value.
+  // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
+  b++;
+  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
+  i++;
+  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
+  l++;
+  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
+  s++;
+  // Prefix increment
+  // Special case for bool: set to true and return true
+  // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
+  ++b;
+  // Currently, we have no variant of atomicrmw that returns the new value, so
+  // we have to generate an atomic add, which returns the old value, and then a
+  // non-atomic add.
+  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
+  // CHECK: add i32 
+  ++i;
+  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
+  // CHECK: add i64
+  ++l;
+  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
+  // CHECK: add i16
+  ++s;
+}
+// CHECK: testdec
+void testdec(void)
+{
+  // CHECK: cmpxchg i8* @b
+  b--;
+  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
+  i--;
+  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
+  l--;
+  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
+  s--;
+  // CHECK: cmpxchg i8* @b
+  --b;
+  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
+  // CHECK: sub i32
+  --i;
+  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
+  // CHECK: sub i64
+  --l;
+  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
+  // CHECK: sub i16
+  --s;
+}
+// CHECK: testaddeq
+void testaddeq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
+  b += 42;
+  i += 42;
+  l += 42;
+  s += 42;
+}
+// CHECK: testsubeq
+void testsubeq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
+  b -= 42;
+  i -= 42;
+  l -= 42;
+  s -= 42;
+}
+// CHECK: testxoreq
+void testxoreq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
+  b ^= 42;
+  i ^= 42;
+  l ^= 42;
+  s ^= 42;
+}
+// CHECK: testoreq
+void testoreq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
+  b |= 42;
+  i |= 42;
+  l |= 42;
+  s |= 42;
+}
+// CHECK: testandeq
+void testandeq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
+  b &= 42;
+  i &= 42;
+  l &= 42;
+  s &= 42;
+}
+
author	David Chisnall <csdavec@swan.ac.uk>	2013-03-03 16:02:42 +0000
committer	David Chisnall <csdavec@swan.ac.uk>	2013-03-03 16:02:42 +0000
commit	72c1dba494b02960284bc6618c1b640c950c3785 (patch)
tree	7871ecd146763704e3207ccd2abcae724f0d7bfe
parent	5b9f5cc7c4310aec110f315df6fc6d6366b24b08 (diff)