diff options
author | Dale Johannesen <dalej@apple.com> | 2008-10-02 18:53:47 +0000 |
---|---|---|
committer | Dale Johannesen <dalej@apple.com> | 2008-10-02 18:53:47 +0000 |
commit | 48c1bc2ace6481d3272ab5c18e1f19352c563be8 (patch) | |
tree | 415c0f43e5c715afbe4b5209c9da1fadbe232080 | |
parent | 46f7a5ec1af7e991f5dae6625b0d187ab62ad511 (diff) |
Handle some 64-bit atomics on x86-32, some of the time.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56963 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 27 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 33 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 237 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 20 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 47 | ||||
-rw-r--r-- | test/CodeGen/X86/2008-10-02-Atomics32-2.ll | 969 | ||||
-rw-r--r-- | utils/TableGen/CodeGenInstruction.cpp | 3 |
7 files changed, 1316 insertions, 20 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 0a0a06bb00..243b21c39d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -6212,21 +6212,26 @@ void SelectionDAGLegalize::ExpandOp(SDValue Op, SDValue &Lo, SDValue &Hi){ break; } - // FIXME: should the LOAD_BIN and SWAP atomics get here too? Probably. - case ISD::ATOMIC_CMP_SWAP_8: - case ISD::ATOMIC_CMP_SWAP_16: - case ISD::ATOMIC_CMP_SWAP_32: + case ISD::ATOMIC_LOAD_ADD_64: + case ISD::ATOMIC_LOAD_SUB_64: + case ISD::ATOMIC_LOAD_AND_64: + case ISD::ATOMIC_LOAD_OR_64: + case ISD::ATOMIC_LOAD_XOR_64: + case ISD::ATOMIC_LOAD_NAND_64: + case ISD::ATOMIC_SWAP_64: case ISD::ATOMIC_CMP_SWAP_64: { - SDValue Tmp = TLI.LowerOperation(Op, DAG); - assert(Tmp.getNode() && "Node must be custom expanded!"); - ExpandOp(Tmp.getValue(0), Lo, Hi); - AddLegalizedOperand(SDValue(Node, 1), // Remember we legalized the chain. - LegalizeOp(Tmp.getValue(1))); + SDValue In2Lo, In2Hi, In2; + ExpandOp(Op.getOperand(2), In2Lo, In2Hi); + In2 = DAG.getNode(ISD::BUILD_PAIR, VT, In2Lo, In2Hi); + SDValue Result = TLI.LowerOperation( + DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), Op.getOperand(1), In2), + DAG); + ExpandOp(Result.getValue(0), Lo, Hi); + // Remember that we legalized the chain. + AddLegalizedOperand(SDValue(Node,1), LegalizeOp(Result.getValue(1))); break; } - - // These operators cannot be expanded directly, emit them as calls to // library functions. case ISD::FP_TO_SINT: { diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index ea3dbad0ee..a60b13ef08 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -160,6 +160,7 @@ namespace { private: SDNode *Select(SDValue N); + SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); bool MatchAddress(SDValue N, X86ISelAddressMode &AM, bool isRoot = true, unsigned Depth = 0); @@ -1205,6 +1206,25 @@ SDNode *X86DAGToDAGISel::getTruncateTo8Bit(SDValue N0) { MVT::i8, N0, SRIdx, N0.getValue(1)); } +SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { + SDValue Chain = Node->getOperand(0); + SDValue In1 = Node->getOperand(1); + SDValue In2L = Node->getOperand(2); + SDValue In2H = Node->getOperand(3); + SDValue Tmp0, Tmp1, Tmp2, Tmp3; + if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3)) + return NULL; + AddToISelQueue(Tmp0); + AddToISelQueue(Tmp1); + AddToISelQueue(Tmp2); + AddToISelQueue(Tmp3); + AddToISelQueue(In2L); + AddToISelQueue(In2H); + AddToISelQueue(Chain); + SDValue LSI = CurDAG->getMemOperand(cast<MemSDNode>(In1)->getMemOperand()); + const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, In2L, In2H, LSI, Chain }; + return CurDAG->getTargetNode(Opc, MVT::i32, MVT::i32, MVT::Other, Ops, 8); +} SDNode *X86DAGToDAGISel::Select(SDValue N) { SDNode *Node = N.getNode(); @@ -1277,6 +1297,19 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { break; } + case X86ISD::ATOMOR64_DAG: + return SelectAtomic64(Node, X86::ATOMOR6432); + case X86ISD::ATOMXOR64_DAG: + return SelectAtomic64(Node, X86::ATOMXOR6432); + case X86ISD::ATOMADD64_DAG: + return SelectAtomic64(Node, X86::ATOMADD6432); + case X86ISD::ATOMSUB64_DAG: + return SelectAtomic64(Node, X86::ATOMSUB6432); + case X86ISD::ATOMNAND64_DAG: + return SelectAtomic64(Node, X86::ATOMNAND6432); + case X86ISD::ATOMAND64_DAG: + return SelectAtomic64(Node, X86::ATOMAND6432); + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3bff205b99..f65167bd88 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -302,6 +302,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom); + if (!Subtarget->is64Bit()) { + setOperationAction(ISD::ATOMIC_LOAD_ADD_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_NAND_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP_64, MVT::i64, Custom); + } + // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); // FIXME - use subtarget debug flags @@ -6004,6 +6014,27 @@ SDNode* X86TargetLowering::ExpandATOMIC_CMP_SWAP(SDNode* Op, return DAG.getMergeValues(Vals, 2).getNode(); } +SDValue X86TargetLowering::LowerATOMIC_BINARY_64(SDValue Op, + SelectionDAG &DAG, + unsigned NewOp) { + SDNode *Node = Op.getNode(); + MVT T = Node->getValueType(0); + assert (T == MVT::i64 && "Only know how to expand i64 atomics"); + + SDValue Chain = Node->getOperand(0); + SDValue In1 = Node->getOperand(1); + assert(Node->getOperand(2).getNode()->getOpcode()==ISD::BUILD_PAIR); + SDValue In2L = Node->getOperand(2).getNode()->getOperand(0); + SDValue In2H = Node->getOperand(2).getNode()->getOperand(1); + SDValue Ops[] = { Chain, In1, In2L, In2H }; + SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue Result = DAG.getNode(NewOp, Tys, Ops, 4); + SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; + SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2); + SDValue Vals[2] = { ResultVal, Result.getValue(2) }; + return SDValue(DAG.getMergeValues(Vals, 2).getNode(), 0); +} + SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); MVT T = Node->getValueType(0); @@ -6027,14 +6058,27 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Should not custom lower this!"); - case ISD::ATOMIC_CMP_SWAP_8: return LowerCMP_SWAP(Op,DAG); - case ISD::ATOMIC_CMP_SWAP_16: return LowerCMP_SWAP(Op,DAG); - case ISD::ATOMIC_CMP_SWAP_32: return LowerCMP_SWAP(Op,DAG); + case ISD::ATOMIC_CMP_SWAP_8: + case ISD::ATOMIC_CMP_SWAP_16: + case ISD::ATOMIC_CMP_SWAP_32: case ISD::ATOMIC_CMP_SWAP_64: return LowerCMP_SWAP(Op,DAG); - case ISD::ATOMIC_LOAD_SUB_8: return LowerLOAD_SUB(Op,DAG); - case ISD::ATOMIC_LOAD_SUB_16: return LowerLOAD_SUB(Op,DAG); + case ISD::ATOMIC_LOAD_SUB_8: + case ISD::ATOMIC_LOAD_SUB_16: case ISD::ATOMIC_LOAD_SUB_32: return LowerLOAD_SUB(Op,DAG); - case ISD::ATOMIC_LOAD_SUB_64: return LowerLOAD_SUB(Op,DAG); + case ISD::ATOMIC_LOAD_SUB_64: return (Subtarget->is64Bit()) ? + LowerLOAD_SUB(Op,DAG) : + LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMSUB64_DAG); + case ISD::ATOMIC_LOAD_AND_64: return LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMAND64_DAG); + case ISD::ATOMIC_LOAD_OR_64: return LowerATOMIC_BINARY_64(Op, DAG, + X86ISD::ATOMOR64_DAG); + case ISD::ATOMIC_LOAD_XOR_64: return LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMXOR64_DAG); + case ISD::ATOMIC_LOAD_NAND_64: return LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMNAND64_DAG); + case ISD::ATOMIC_LOAD_ADD_64: return LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMADD64_DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); @@ -6140,6 +6184,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; + case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; + case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; + case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; + case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; + case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; + case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -6367,6 +6417,146 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, // private utility function MachineBasicBlock * +X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, + MachineBasicBlock *MBB, + unsigned regOpcL, + unsigned regOpcH, + unsigned immOpcL, + unsigned immOpcH, + bool invSrc) { + // For the atomic bitwise operator, we generate + // thisMBB (instructions are in pairs, except cmpxchg8b) + // ld t1,t2 = [bitinstr.addr] + // newMBB: + // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) + // op t5, t6 <- out1, out2, [bitinstr.val] + // mov ECX, EBX <- t5, t6 + // mov EAX, EDX <- t1, t2 + // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] + // mov t3, t4 <- EAX, EDX + // bz newMBB + // result in out1, out2 + // fallthrough -->nextMBB + + const TargetRegisterClass *RC = X86::GR32RegisterClass; + const unsigned LoadOpc = X86::MOV32rm; + const unsigned copyOpc = X86::MOV32rr; + const unsigned NotOpc = X86::NOT32r; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(MBBIter, newMBB); + F->insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to itself and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + // There are 8 "real" operands plus 9 implicit def/uses, ignored here. + assert(bInstr->getNumOperands() < 18 && "unexpected number of operands"); + MachineOperand& dest1Oper = bInstr->getOperand(0); + MachineOperand& dest2Oper = bInstr->getOperand(1); + MachineOperand* argOpers[6]; + for (int i=0; i < 6; ++i) + argOpers[i] = &bInstr->getOperand(i+2); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = 3; // [0,3] + + unsigned t1 = F->getRegInfo().createVirtualRegister(RC); + MachineInstrBuilder MIB = BuildMI(thisMBB, TII->get(LoadOpc), t1); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + unsigned t2 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(thisMBB, TII->get(LoadOpc), t2); + // add 4 to displacement. getImm verifies it's immediate. + for (int i=0; i <= lastAddrIndx-1; ++i) + (*MIB).addOperand(*argOpers[i]); + MachineOperand newOp3 = MachineOperand::CreateImm(argOpers[3]->getImm()+4); + (*MIB).addOperand(newOp3); + + // t3/4 are defined later, at the bottom of the loop + unsigned t3 = F->getRegInfo().createVirtualRegister(RC); + unsigned t4 = F->getRegInfo().createVirtualRegister(RC); + BuildMI(newMBB, TII->get(X86::PHI), dest1Oper.getReg()) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); + BuildMI(newMBB, TII->get(X86::PHI), dest2Oper.getReg()) + .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); + + unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); + unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); + if (invSrc) { + MIB = BuildMI(newMBB, TII->get(NotOpc), tt1).addReg(t1); + MIB = BuildMI(newMBB, TII->get(NotOpc), tt2).addReg(t2); + } else { + tt1 = t1; + tt2 = t2; + } + + assert((argOpers[4]->isRegister() || argOpers[4]->isImmediate()) && + "invalid operand"); + unsigned t5 = F->getRegInfo().createVirtualRegister(RC); + unsigned t6 = F->getRegInfo().createVirtualRegister(RC); + if (argOpers[4]->isRegister()) + MIB = BuildMI(newMBB, TII->get(regOpcL), t5); + else + MIB = BuildMI(newMBB, TII->get(immOpcL), t5); + MIB.addReg(tt1); + (*MIB).addOperand(*argOpers[4]); + assert(argOpers[5]->isRegister() == argOpers[4]->isRegister()); + assert(argOpers[5]->isImmediate() == argOpers[4]->isImmediate()); + if (argOpers[5]->isRegister()) + MIB = BuildMI(newMBB, TII->get(regOpcH), t6); + else + MIB = BuildMI(newMBB, TII->get(immOpcH), t6); + MIB.addReg(tt2); + (*MIB).addOperand(*argOpers[5]); + + MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EAX); + MIB.addReg(t1); + MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EDX); + MIB.addReg(t2); + + MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EBX); + MIB.addReg(t5); + MIB = BuildMI(newMBB, TII->get(copyOpc), X86::ECX); + MIB.addReg(t6); + + MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG8B)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); + (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); + + MIB = BuildMI(newMBB, TII->get(copyOpc), t3); + MIB.addReg(X86::EAX); + MIB = BuildMI(newMBB, TII->get(copyOpc), t4); + MIB.addReg(X86::EDX); + + // insert branch + BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); + + F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + return nextMBB; +} + +// private utility function +MachineBasicBlock * X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MachineBasicBlock *MBB, unsigned cmovOpc) { @@ -6695,6 +6885,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, X86::NOT8r, X86::AL, X86::GR8RegisterClass, true); // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. + // This group is for 64-bit host. case X86::ATOMAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, @@ -6727,6 +6918,40 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); case X86::ATOMUMAX64: return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); + + // This group does 64-bit operations on a 32-bit host. + case X86::ATOMAND6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::AND32rr, X86::AND32rr, + X86::AND32ri, X86::AND32ri, + false); + case X86::ATOMOR6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::OR32rr, X86::OR32rr, + X86::OR32ri, X86::OR32ri, + false); + case X86::ATOMXOR6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::XOR32rr, X86::XOR32rr, + X86::XOR32ri, X86::XOR32ri, + false); + case X86::ATOMNAND6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::AND32rr, X86::AND32rr, + X86::AND32ri, X86::AND32ri, + true); + // FIXME carry + case X86::ATOMADD6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::ADD32rr, X86::ADC32rr, + X86::ADD32ri, X86::ADC32ri, + false); + // FIXME carry + case X86::ATOMSUB6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::SUB32rr, X86::SBB32rr, + X86::SUB32ri, X86::SBB32ri, + false); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 54a74533eb..f12ff70703 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -199,6 +199,15 @@ namespace llvm { LCMPXCHG_DAG, LCMPXCHG8_DAG, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, + // ATOMXOR64_DAG, ATOMNAND64_DAG - Atomic 64-bit binary operations. + ATOMADD64_DAG, + ATOMSUB64_DAG, + ATOMOR64_DAG, + ATOMXOR64_DAG, + ATOMAND64_DAG, + ATOMNAND64_DAG, + // FNSTCW16m - Store FP control world into i16 memory. FNSTCW16m, @@ -570,6 +579,8 @@ namespace llvm { SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG); SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG); SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG); + SDValue LowerATOMIC_BINARY_64(SDValue Op, SelectionDAG &DAG, + unsigned NewOp); SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); SDNode *ExpandATOMIC_CMP_SWAP(SDNode *N, SelectionDAG &DAG); @@ -602,6 +613,15 @@ namespace llvm { unsigned EAXreg, TargetRegisterClass *RC, bool invSrc = false); + + MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( + MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned regOpcL, + unsigned regOpcH, + unsigned immOpcL, + unsigned immOpcH, + bool invSrc = false); /// Utility function to emit atomic min and max. It takes the min/max // instruction to expand, the associated basic block, and the associated diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 6ffff6bad2..de80bb2d45 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -39,6 +39,8 @@ def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>; def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>; def SDT_X86CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; @@ -79,7 +81,24 @@ def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, SDNPMayLoad]>; - +def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary, + [SDNPHasChain, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInFlag]>; @@ -2630,7 +2649,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), "lock\n\tcmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; } -let Defs = [EAX, EBX, ECX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr), "lock\n\tcmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; @@ -2730,6 +2749,30 @@ def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>; } +let Constraints = "$val1 = $dst1, $val2 = $dst2", + Defs = [EFLAGS, EAX, EBX, ECX, EDX], + Uses = [EAX, EBX, ECX, EDX], + usesCustomDAGSchedInserter = 1 in { +def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMAND6432 PSUEDO!", []>; +def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMOR6432 PSUEDO!", []>; +def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMXOR6432 PSUEDO!", []>; +def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMNAND6432 PSUEDO!", []>; +def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMADD6432 PSUEDO!", []>; +def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMSUB6432 PSUEDO!", []>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/2008-10-02-Atomics32-2.ll b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll new file mode 100644 index 0000000000..3cf9deea6e --- /dev/null +++ b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll @@ -0,0 +1,969 @@ +; RUN: llvm-as < %s | llc -march=x86 +;; This version includes 64-bit version of binary operators (in 32-bit mode). +;; Swap, cmp-and-swap not supported yet in this mode. +; ModuleID = 'Atomics.c' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" +@sc = common global i8 0 ; <i8*> [#uses=52] +@uc = common global i8 0 ; <i8*> [#uses=112] +@ss = common global i16 0 ; <i16*> [#uses=15] +@us = common global i16 0 ; <i16*> [#uses=15] +@si = common global i32 0 ; <i32*> [#uses=15] +@ui = common global i32 0 ; <i32*> [#uses=23] +@sl = common global i32 0 ; <i32*> [#uses=15] +@ul = common global i32 0 ; <i32*> [#uses=15] +@sll = common global i64 0, align 8 ; <i64*> [#uses=13] +@ull = common global i64 0, align 8 ; <i64*> [#uses=13] + +define void @test_op_ignore() nounwind { +entry: + %0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @sc, i8 1) ; <i8> [#uses=0] + %1 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @uc, i8 1) ; <i8> [#uses=0] + %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16* ; <i16*> [#uses=1] + %3 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %2, i16 1) ; <i16> [#uses=0] + %4 = bitcast i8* bitcast (i16* @us to i8*) to i16* ; <i16*> [#uses=1] + %5 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %4, i16 1) ; <i16> [#uses=0] + %6 = bitcast i8* bitcast (i32* @si to i8*) to i32* ; <i32*> [#uses=1] + %7 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %6, i32 1) ; <i32> [#uses=0] + %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32* ; <i32*> [#uses=1] + %9 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %8, i32 1) ; <i32> [#uses=0] + %10 = bitcast i8* bitcast (i32* @sl to i8*) to i32* ; <i32*> [#uses=1] + %11 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %10, i32 1) ; <i32> [#uses=0] + %12 = bitcast i8* bitcast (i32* @ul to i8*) to i32* ; <i32*> [#uses=1] + %13 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %12, i32 1) ; <i32> [#uses=0] + %14 = bitcast i8* bitcast (i64* @sll to i8*) to i64* ; <i64*> [#uses=1] + %15 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %14, i64 1) ; <i64> [#uses=0] + %16 = bitcast i8* bitcast (i64* @ull to i8*) to i64* ; <i64*> [#uses=1] + %17 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %16, i64 1) ; <i64> [#uses=0] + %18 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @sc, i8 1) ; <i8> [#uses=0] + %19 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @uc, i8 1) ; <i8> [#uses=0] + %20 = bitcast i8* bitcast (i16* @ss to i8*) to i16* ; <i16*> [#uses=1] + %21 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %20, i16 1) ; <i16> [#uses=0] + %22 = bitcast i8* bitcast (i16* @us to i8*) to i16* ; <i16*> [#uses=1] + %23 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %22, i16 1) ; <i16> [#uses=0] + %24 = bitcast i8* bitcast (i32* @si to i8*) to i32* ; <i32*> [#uses=1] + %25 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %24, i32 1) ; <i32> [#uses=0] + %26 = bitcast i8* bitcast (i32* @ui to i8*) to i32* ; <i32*> [#uses=1] + %27 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %26, i32 1) ; <i32> [#uses=0] + %28 = bitcast i8* bitcast (i32* @sl to i8*) to i32* ; <i32*> [#uses=1] + %29 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %28, i32 1) ; <i32> [#uses=0] + %30 = bitcast i8* bitcast (i32* @ul to i8*) to i32* ; <i32*> [#uses=1] + %31 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %30, i32 1) ; <i32> [#uses=0] + %32 = bitcast i8* bitcast (i64* @sll to i8*) to i64* ; <i64*> [#uses=1] + %33 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %32, i64 1) ; <i64> [#uses=0] + %34 = bitcast i8* bitcast (i64* @ull to i8*) to i64* ; <i64*> [#uses=1] + %35 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %34, i64 1) ; <i64> [#uses=0] + %36 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @sc, i8 1) ; <i8> [#uses=0] + %37 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @uc, i8 1) ; <i8> [#uses=0] + %38 = bitcast i8* bitcast (i16* @ss to i8*) to i16* ; <i16*> [#uses=1] + %39 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %38, i16 1) ; <i16> [#uses=0] + %40 = bitcast i8* bitcast (i16* @us to i8*) to i16* ; <i16*> [#uses=1] + %41 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %40, i16 1) ; <i16> [#uses=0] + %42 = bitcast i8* bitcast (i32* @si to i8*) to i32* ; <i32*> [#uses=1] + %43 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %42, i32 1) ; <i32> [#uses=0] + %44 = bitcast i8* bitcast (i32* @ui to i8*) to i32* ; <i32*> [#uses=1] + %45 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %44, i32 1) ; <i32> [#uses=0] + %46 = bitcast i8* bitcast (i32* @sl to i8*) to i32* ; <i32*> [#uses=1] + %47 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %46, i32 1) ; <i32> [#uses=0] + %48 = bitcast i8* bitcast (i32* @ul to i8*) to i32* ; <i32*> [#uses=1] + %49 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %48, i32 1) ; <i32> [#uses=0] + %50 = bitcast i8* bitcast (i64* @sll to i8*) to i64* ; <i64*> [#uses=1] + %51 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %50, i64 1) ; <i64> [#uses=0] + %52 = bitcast i8* bitcast (i64* @ull to i8*) to i64* ; <i64*> [#uses=1] + %53 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %52, i64 1) ; <i64> [#uses=0] + %54 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @sc, i8 1) ; <i8> [#uses=0] + %55 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @uc, i8 1) ; <i8> [#uses=0] + %56 = bitcast i8* bitcast (i16* @ss to i8*) to i16* ; <i16*> [#uses=1] + %57 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %56, i16 1) ; <i16> [#uses=0] + %58 = bitcast i8* bitcast (i16* @us to i8*) to i16* ; <i16*> [#uses=1] + %59 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %58, i16 1) ; <i16> [#uses=0] + %60 = bitcast i8* bitcast (i32* @si to i8*) to i32* ; <i32*> [#uses=1] + %61 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %60, i32 1) ; <i32> [#uses=0] + %62 = bitcast i8* bitcast (i32* @ui to i8*) to i32* ; <i32*> [#uses=1] + %63 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %62, i32 1) ; <i32> [#uses=0] + %64 = bitcast i8* bitcast (i32* @sl to i8*) to i32* ; <i32*> [#uses=1] + %65 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %64, i32 1) ; <i32> [#uses=0] + %66 = bitcast i8* bitcast (i32* @ul to i8*) to i32* ; <i32*> [#uses=1] + %67 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %66, i32 1) ; <i32> [#uses=0] + %68 = bitcast i8* bitcast (i64* @sll to i8*) to i64* ; <i64*> [#uses=1] + %69 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %68, i64 1) ; <i64> [#uses=0] + %70 = bitcast i8* bitcast (i64* @ull to i8*) to i64* ; <i64*> [#uses=1] + %71 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %70, i64 1) ; <i64> [#uses=0] + %72 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 1) ; <i8> [#uses=0] + %73 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 1) ; <i8> [#uses=0] + %74 = bitcast i8* bitcast (i16* @ss to i8*) to i16* ; <i16*> [#uses=1] + %75 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %74, i16 1) ; <i16> [#uses=0] + %76 = bitcast i8* bitcast (i16* @us to i8*) to i16* ; <i16*> [#uses=1] + %77 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %76, i16 1) ; <i16> [#uses=0] + %78 = bitcast i8* bitcast (i32* @si to i8*) to i32* ; <i32*> [#uses=1] + %79 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %78, i32 1) ; <i32> [#uses=0] + %80 = bitcast i8* bitcast (i32* @ui to i8*) to i32* ; <i32*> [#uses=1] + %81 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %80, i32 1) ; <i32> [#uses=0] + %82 = bitcast i8* bitcast (i32* @sl to i8*) to i32* ; <i32*> [#uses=1] + %83 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %82, i32 1) ; <i32> [#uses=0] + %84 = bitcast i8* bitcast (i32* @ul to i8*) to i32* ; <i32*> [#uses=1] + %85 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %84, i32 1) ; <i32> [#uses=0] + %86 = bitcast i8* bitcast (i64* @sll to i8*) to i64* ; <i64*> [#uses=1] + %87 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %86, i64 1) ; <i64> [#uses=0] + %88 = bitcast i8* bitcast (i64* @ull to i8*) to i64* ; <i64*> [#uses=1] + %89 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %88, i64 1) ; <i64> [#uses=0] + %90 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @sc, i8 1) ; <i8> [#uses=0] + %91 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @uc, i8 1) ; <i8> [#uses=0] + %92 = bitcast i8* bitcast (i16* @ss to i8*) to i16* ; <i16*> [#uses=1] + %93 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %92, i16 1) ; <i16> [#uses=0] + %94 = bitcast i8* bitcast (i16* @us to i8*) to i16* ; <i16*> [#uses=1] + %95 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %94, i16 1) ; <i16> [#uses=0] + %96 = bitcast i8* bitcast (i32* @si to i8*) to i32* ; <i32*> [#uses=1] + %97 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %96, i32 1) ; <i32> [#uses=0] + %98 = bitcast i8* bitcast (i32* @ui to i8*) to i32* ; <i32*> [#uses=1] + %99 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %98, i32 1) ; <i32> [#uses=0] + %100 = bitcast i8* bitcast (i32* @sl to i8*) to i32* ; <i32*> [#uses=1] + %101 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %100, i32 1) ; <i32> [#uses=0] + %102 = bitcast i8* bitcast (i32* @ul to i8*) to i32* ; <i32*> [#uses=1] + %103 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %102, i32 1) ; <i32> [#uses=0] + %104 = bitcast i8* bitcast (i64* @sll to i8*) to i64* ; <i64*> [#uses=1] + %105 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %104, i64 1) ; <i64> [#uses=0] + %106 = bitcast i8* bitcast (i64* @ull to i8*) to i64* ; <i64*> [#uses=1] + %107 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %106, i64 1) ; <i64> [#uses=0] + br label %return + +return: ; preds = %entry + ret void +} + +declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind + +declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind + +declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind + +declare i64 @llvm.atomic.load.add.i64.p0i64(i64*, i64) nounwind + +declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind + +declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind + +declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind + +declare i64 @llvm.atomic.load.sub.i64.p0i64(i64*, i64) nounwind + +declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind + +declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind + +declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind + +declare i64 @llvm.atomic.load.or.i64.p0i64(i64*, i64) nounwind + +declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind + +declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind + +declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind + +declare i64 @llvm.atomic.load.xor.i64.p0i64(i64*, i64) nounwind + +declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind + +declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind + +declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind + +declare i64 @llvm.atomic.load.and.i64.p0i64(i64*, i64) nounwind + +declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind + +declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind + +declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind + +declare i64 @llvm.atomic.load.nand.i64.p0i64(i64*, i64) nounwind + +define void @test_fetch_and_op() nounwind { +entry: + %0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @sc, i8 11) ; <i8> [#uses=1] + store i8 %0, i8* @sc, align 1 + %1 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @uc, i8 11) ; <i8> [#uses=1] + store i8 %1, i8* @uc, align 1 + %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16* ; <i16*> [#uses=1] + %3 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %2, i16 11) ; <i16> [#uses=1] + store i16 %3, i16* @ss, align 2 + %4 = bitcast i8* bitcast (i16* @us to i8*) to i16* ; <i16*> [#uses=1] + %5 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %4, i16 11) ; <i16> [#uses=1] + store i16 %5, i16* @us, align 2 + %6 = bitcast i8* bitcast (i32* @si to i8*) to i32* ; <i32*> [#uses=1] + %7 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %6, i32 11) ; <i32> [#uses=1] + store i32 %7, i32* @si, align 4 + %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32* ; <i32*> [#uses=1] + %9 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %8, i32 11) ; <i32> [#uses=1] + store i32 %9, i32* @ui, align 4 + %10 = bitcast i8* bitcast (i32* @sl to i8*) to i32* ; <i32*> [#uses=1] + %11 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %10, i32 11) ; <i32> [#uses=1] + store i32 %11, i32* @sl, align 4 + %12 = bitcast i8* bitcast (i32* @ul to i8*) to i32* ; <i32*> [#uses=1] + %13 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %12, i32 11) ; <i32> [#uses=1] + store i32 %13, i32* @ul, align 4 + %14 = bitcast i8* bitcast (i64* @sll to i8*) to i64* ; <i64*> [#uses=1] + %15 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %14, i64 11) ; <i64> [#uses=1] + store i64 %15, i64* @sll, align 8 + %16 = bitcast i8* bitcast (i64* @ull to i8*) to i64* ; <i64*> [#uses=1] + %17 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %16, i64 11) ; <i64> [#uses=1] + store i64 %17, i64* @ull, align 8 + %18 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @sc, i8 11) ; <i8> [#uses=1] + store i8 %18, i8* @sc, align 1 + %19 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @uc, i8 11) ; <i8> [#uses=1] + store i8 %19, i8* @uc, align 1 + %20 = bitcast i8* bitcast (i16* @ss to i8*) to i16* ; <i16*> [#uses=1] + %21 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %20, i16 11) ; <i16> [#uses=1] |