aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Chisnall <csdavec@swan.ac.uk>2013-03-03 16:02:42 +0000
committerDavid Chisnall <csdavec@swan.ac.uk>2013-03-03 16:02:42 +0000
commit72c1dba494b02960284bc6618c1b640c950c3785 (patch)
tree7871ecd146763704e3207ccd2abcae724f0d7bfe
parent5b9f5cc7c4310aec110f315df6fc6d6366b24b08 (diff)
Improve C11 atomics support:
- Generate atomicrmw operations in most of the cases when it's sensible to do so. - Don't crash in several common cases (and hopefully don't crash in more of them). - Add some better tests. We now generate significantly better code for things like: _Atomic(int) x; ... x++; On MIPS, this now generates a 4-instruction ll/sc loop, where previously it generated about 30 instructions in two nested loops. On x86-64, we generate a single lock incl, instead of a lock cmpxchgl loop (one instruction instead of ten). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@176420 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/CGExprScalar.cpp96
-rw-r--r--test/CodeGen/atomic_ops.c5
-rw-r--r--test/CodeGen/c11atomics.c139
3 files changed, 228 insertions, 12 deletions
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 1bfd4146de..7df4818e5d 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre) {
QualType type = E->getSubExpr()->getType();
- llvm::Value *value = EmitLoadOfLValue(LV);
- llvm::Value *input = value;
llvm::PHINode *atomicPHI = 0;
+ llvm::Value *value;
+ llvm::Value *input;
int amount = (isInc ? 1 : -1);
if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
+ type = atomicTy->getValueType();
+ if (isInc && type->isBooleanType()) {
+ llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
+ if (isPre) {
+ Builder.Insert(new llvm::StoreInst(True,
+ LV.getAddress(), LV.isVolatileQualified(),
+ LV.getAlignment().getQuantity(),
+ llvm::SequentiallyConsistent));
+ return Builder.getTrue();
+ }
+ // For atomic bool increment, we just store true and return it for
+ // preincrement, do an atomic swap with true for postincrement
+ return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
+ LV.getAddress(), True, llvm::SequentiallyConsistent);
+ }
+ // Special case for atomic increment / decrement on integers, emit
+ // atomicrmw instructions. We skip this if we want to be doing overflow
+ // checking, and fall into the slow path with the atomic cmpxchg loop.
+ if (!type->isBooleanType() && type->isIntegerType() &&
+ !(type->isUnsignedIntegerType() &&
+ CGF.SanOpts->UnsignedIntegerOverflow) &&
+ CGF.getLangOpts().getSignedOverflowBehavior() !=
+ LangOptions::SOB_Trapping) {
+ llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
+ llvm::AtomicRMWInst::Sub;
+ llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
+ llvm::Instruction::Sub;
+ llvm::Value *amt = CGF.EmitToMemory(
+ llvm::ConstantInt::get(ConvertType(type), 1, true), type);
+ llvm::Value *old = Builder.CreateAtomicRMW(aop,
+ LV.getAddress(), amt, llvm::SequentiallyConsistent);
+ return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+ }
+ value = EmitLoadOfLValue(LV);
+ input = value;
+ // For every other atomic operation, we need to emit a load-op-cmpxchg loop
llvm::BasicBlock *startBB = Builder.GetInsertBlock();
llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
+ value = CGF.EmitToMemory(value, type);
Builder.CreateBr(opBB);
Builder.SetInsertPoint(opBB);
atomicPHI = Builder.CreatePHI(value->getType(), 2);
atomicPHI->addIncoming(value, startBB);
- type = atomicTy->getValueType();
value = atomicPHI;
+ } else {
+ value = EmitLoadOfLValue(LV);
+ input = value;
}
// Special case of integer increment that we have to check first: bool++.
@@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
- value, llvm::SequentiallyConsistent);
+ CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
atomicPHI->addIncoming(old, opBB);
llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
Builder.CreateCondBr(success, contBB, opBB);
@@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
OpInfo.E = E;
// Load/convert the LHS.
LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
- OpInfo.LHS = EmitLoadOfLValue(LHSLV);
llvm::PHINode *atomicPHI = 0;
- if (LHSTy->isAtomicType()) {
+ if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
+ QualType type = atomicTy->getValueType();
+ if (!type->isBooleanType() && type->isIntegerType() &&
+ !(type->isUnsignedIntegerType() &&
+ CGF.SanOpts->UnsignedIntegerOverflow) &&
+ CGF.getLangOpts().getSignedOverflowBehavior() !=
+ LangOptions::SOB_Trapping) {
+ llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
+ switch (OpInfo.Opcode) {
+ // We don't have atomicrmw operands for *, %, /, <<, >>
+ case BO_MulAssign: case BO_DivAssign:
+ case BO_RemAssign:
+ case BO_ShlAssign:
+ case BO_ShrAssign:
+ break;
+ case BO_AddAssign:
+ aop = llvm::AtomicRMWInst::Add;
+ break;
+ case BO_SubAssign:
+ aop = llvm::AtomicRMWInst::Sub;
+ break;
+ case BO_AndAssign:
+ aop = llvm::AtomicRMWInst::And;
+ break;
+ case BO_XorAssign:
+ aop = llvm::AtomicRMWInst::Xor;
+ break;
+ case BO_OrAssign:
+ aop = llvm::AtomicRMWInst::Or;
+ break;
+ default:
+ llvm_unreachable("Invalid compound assignment type");
+ }
+ if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
+ llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
+ E->getRHS()->getType(), LHSTy), LHSTy);
+ Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
+ llvm::SequentiallyConsistent);
+ return LHSLV;
+ }
+ }
// FIXME: For floating point types, we should be saving and restoring the
// floating point environment in the loop.
llvm::BasicBlock *startBB = Builder.GetInsertBlock();
llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
+ OpInfo.LHS = EmitLoadOfLValue(LHSLV);
+ OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
Builder.CreateBr(opBB);
Builder.SetInsertPoint(opBB);
atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
atomicPHI->addIncoming(OpInfo.LHS, startBB);
OpInfo.LHS = atomicPHI;
}
+ else
+ OpInfo.LHS = EmitLoadOfLValue(LHSLV);
OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
E->getComputationLHSType());
@@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
- Result, llvm::SequentiallyConsistent);
+ CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
atomicPHI->addIncoming(old, opBB);
llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
Builder.CreateCondBr(success, contBB, opBB);
diff --git a/test/CodeGen/atomic_ops.c b/test/CodeGen/atomic_ops.c
index 481d1e06fb..910e9b9505 100644
--- a/test/CodeGen/atomic_ops.c
+++ b/test/CodeGen/atomic_ops.c
@@ -15,9 +15,4 @@ void foo(int x)
// CHECK: sdiv i32
// CHECK: cmpxchg i16*
- // These should be emitting atomicrmw instructions, but they aren't yet
- i += 2; // CHECK: cmpxchg
- i -= 2; // CHECK: cmpxchg
- i++; // CHECK: cmpxchg
- i--; // CHECK: cmpxchg
}
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
new file mode 100644
index 0000000000..fd5d3dea98
--- /dev/null
+++ b/test/CodeGen/c11atomics.c
@@ -0,0 +1,139 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
+
+// Test that we are generating atomicrmw instructions, rather than
+// compare-exchange loops for common atomic ops. This makes a big difference
+// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
+// the load and then another ll/sc in the loop, expanding to about 30
+// instructions when it should be only 4. It has a smaller, but still
+// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
+// RMW instructions.
+//
+// We currently emit cmpxchg loops for most operations on _Bools, because
+// they're sufficiently rare that it's not worth making sure that the semantics
+// are correct.
+
+typedef int __attribute__((vector_size(16))) vector;
+
+_Atomic(_Bool) b;
+_Atomic(int) i;
+_Atomic(long long) l;
+_Atomic(short) s;
+_Atomic(char*) p;
+_Atomic(float) f;
+_Atomic(vector) v;
+
+// CHECK-NOT: cmpxchg
+
+// CHECK: testinc
+void testinc(void)
+{
+ // Special case for suffix bool++, sets to true and returns the old value.
+ // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
+ b++;
+ // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
+ i++;
+ // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
+ l++;
+ // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
+ s++;
+ // Prefix increment
+ // Special case for bool: set to true and return true
+ // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
+ ++b;
+ // Currently, we have no variant of atomicrmw that returns the new value, so
+ // we have to generate an atomic add, which returns the old value, and then a
+ // non-atomic add.
+ // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
+ // CHECK: add i32
+ ++i;
+ // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
+ // CHECK: add i64
+ ++l;
+ // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
+ // CHECK: add i16
+ ++s;
+}
+// CHECK: testdec
+void testdec(void)
+{
+ // CHECK: cmpxchg i8* @b
+ b--;
+ // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
+ i--;
+ // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
+ l--;
+ // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
+ s--;
+ // CHECK: cmpxchg i8* @b
+ --b;
+ // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
+ // CHECK: sub i32
+ --i;
+ // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
+ // CHECK: sub i64
+ --l;
+ // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
+ // CHECK: sub i16
+ --s;
+}
+// CHECK: testaddeq
+void testaddeq(void)
+{
+ // CHECK: cmpxchg i8* @b
+ // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
+ // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
+ // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
+ b += 42;
+ i += 42;
+ l += 42;
+ s += 42;
+}
+// CHECK: testsubeq
+void testsubeq(void)
+{
+ // CHECK: cmpxchg i8* @b
+ // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
+ // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
+ // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
+ b -= 42;
+ i -= 42;
+ l -= 42;
+ s -= 42;
+}
+// CHECK: testxoreq
+void testxoreq(void)
+{
+ // CHECK: cmpxchg i8* @b
+ // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
+ // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
+ // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
+ b ^= 42;
+ i ^= 42;
+ l ^= 42;
+ s ^= 42;
+}
+// CHECK: testoreq
+void testoreq(void)
+{
+ // CHECK: cmpxchg i8* @b
+ // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
+ // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
+ // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
+ b |= 42;
+ i |= 42;
+ l |= 42;
+ s |= 42;
+}
+// CHECK: testandeq
+void testandeq(void)
+{
+ // CHECK: cmpxchg i8* @b
+ // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
+ // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
+ // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
+ b &= 42;
+ i &= 42;
+ l &= 42;
+ s &= 42;
+}
+