18 files changed, 946 insertions, 43 deletions
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 40affb7e91..0c07f12ecf 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -54,6 +54,7 @@
       <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
       <li><a href="#volatile">Volatile Memory Accesses</a></li>
       <li><a href="#memmodel">Memory Model for Concurrent Operations</a></li>
+      <li><a href="#ordering">Atomic Memory Ordering Constraints</a></li>
     </ol>
   </li>
   <li><a href="#typesystem">Type System</a>
@@ -168,10 +169,12 @@
       </li>
       <li><a href="#memoryops">Memory Access and Addressing Operations</a>
         <ol>
-          <li><a href="#i_alloca">'<tt>alloca</tt>'   Instruction</a></li>
-         <li><a href="#i_load">'<tt>load</tt>'     Instruction</a></li>
-         <li><a href="#i_store">'<tt>store</tt>'    Instruction</a></li>
-         <li><a href="#i_fence">'<tt>fence</tt>'    Instruction</a></li>
+          <li><a href="#i_alloca">'<tt>alloca</tt>' Instruction</a></li>
+         <li><a href="#i_load">'<tt>load</tt>' Instruction</a></li>
+         <li><a href="#i_store">'<tt>store</tt>' Instruction</a></li>
+         <li><a href="#i_fence">'<tt>fence</tt>' Instruction</a></li>
+         <li><a href="#i_cmpxchg">'<tt>cmpxchg</tt>' Instruction</a></li>
+         <li><a href="#i_atomicrmw">'<tt>atomicrmw</tt>' Instruction</a></li>
          <li><a href="#i_getelementptr">'<tt>getelementptr</tt>' Instruction</a></li>
         </ol>
       </li>
@@ -1500,8 +1503,9 @@ that</p>
   <li>When a <i>synchronizes-with</i> <tt>b</tt>, includes an edge from
       <tt>a</tt> to <tt>b</tt>. <i>Synchronizes-with</i> pairs are introduced
       by platform-specific techniques, like pthread locks, thread
-      creation, thread joining, etc., and by the atomic operations described
-      in the <a href="#int_atomics">Atomic intrinsics</a> section.</li>
+      creation, thread joining, etc., and by atomic instructions.
+      (See also <a href="#ordering">Atomic Memory Ordering Constraints</a>).
+      </li>
 </ul>
 
 <p>Note that program order does not introduce <i>happens-before</i> edges
@@ -1536,8 +1540,9 @@ any write to the same byte, except:</p>
       write.</li>
   <li>Otherwise, if <var>R</var> is atomic, and all the writes
       <var>R<sub>byte</sub></var> may see are atomic, it chooses one of the
-      values written.  See the <a href="#int_atomics">Atomic intrinsics</a>
-      section for additional guarantees on how the choice is made.
+      values written.  See the <a href="#ordering">Atomic Memory Ordering
+      Constraints</a> section for additional constraints on how the choice
+      is made.
   <li>Otherwise <var>R<sub>byte</sub></var> returns <tt>undef</tt>.</li>
 </ul>
 
@@ -1569,6 +1574,82 @@ as if it writes to the relevant surrounding bytes.
 
 </div>
 
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+      <a name="ordering">Atomic Memory Ordering Constraints</a>
+</div>
+
+<div class="doc_text">
+
+<p>Atomic instructions (<a href="#i_cmpxchg"><code>cmpxchg</code></a>,
+<a href="#i_atomicrmw"><code>atomicrmw</code></a>, and
+<a href="#i_fence"><code>fence</code></a>) take an ordering parameter
+that determines which other atomic instructions on the same address they
+<i>synchronize with</i>.  These semantics are borrowed from Java and C++0x,
+but are somewhat more colloquial. If these descriptions aren't precise enough,
+check those specs.  <a href="#i_fence"><code>fence</code></a> instructions
+treat these orderings somewhat differently since they don't take an address.
+See that instruction's documentation for details.</p>
+
+<!-- FIXME Note atomic load+store here once those get added. -->
+
+<dl>
+<!-- FIXME: unordered is intended to be used for atomic load and store;
+it isn't allowed for any instruction yet. -->
+<dt><code>unordered</code></dt>
+<dd>The set of values that can be read is governed by the happens-before
+partial order. A value cannot be read unless some operation wrote it.
+This is intended to provide a guarantee strong enough to model Java's
+non-volatile shared variables.  This ordering cannot be specified for
+read-modify-write operations; it is not strong enough to make them atomic
+in any interesting way.</dd>
+<dt><code>monotonic</code></dt>
+<dd>In addition to the guarantees of <code>unordered</code>, there is a single
+total order for modifications by <code>monotonic</code> operations on each
+address. All modification orders must be compatible with the happens-before
+order. There is no guarantee that the modification orders can be combined to
+a global total order for the whole program (and this often will not be
+possible). The read in an atomic read-modify-write operation
+(<a href="#i_cmpxchg"><code>cmpxchg</code></a> and
+<a href="#i_atomicrmw"><code>atomicrmw</code></a>)
+reads the value in the modification order immediately before the value it
+writes. If one atomic read happens before another atomic read of the same
+address, the later read must see the same value or a later value in the
+address's modification order. This disallows reordering of
+<code>monotonic</code> (or stronger) operations on the same address. If an
+address is written <code>monotonic</code>ally by one thread, and other threads
+<code>monotonic</code>ally read that address repeatedly, the other threads must
+eventually see the write. This is intended to model C++'s relaxed atomic
+variables.</dd>
+<dt><code>acquire</code></dt>
+<dd>In addition to the guarantees of <code>monotonic</code>, if this operation
+reads a value written by a <code>release</code> atomic operation, it
+<i>synchronizes-with</i> that operation.</dd>
+<dt><code>release</code></dt>
+<dd>In addition to the guarantees of <code>monotonic</code>,
+a <i>synchronizes-with</i> edge may be formed by an <code>acquire</code>
+operation.</dd>
+<dt><code>acq_rel</code> (acquire+release)</dt><dd>Acts as both an
+<code>acquire</code> and <code>release</code> operation on its address.</dd>
+<dt><code>seq_cst</code> (sequentially consistent)</dt><dd>
+<dd>In addition to the guarantees of <code>acq_rel</code>
+(<code>acquire</code> for an operation which only reads, <code>release</code>
+for an operation which only writes), there is a global total order on all
+sequentially-consistent operations on all addresses, which is consistent with
+the <i>happens-before</i> partial order and with the modification orders of
+all the affected addresses. Each sequentially-consistent read sees the last
+preceding write to the same address in this global order. This is intended
+to model C++'s sequentially-consistent atomic variables and Java's volatile
+shared variables.</dd>
+</dl>
+
+<p id="singlethread">If an atomic operation is marked <code>singlethread</code>,
+it only <i>synchronizes with</i> or participates in modification and seq_cst
+total orderings with other operations running in the same thread (for example,
+in signal handlers).</p>
+
+</div>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -4642,6 +4723,158 @@ thread.  (This is useful for interacting with signal handlers.)</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_cmpxchg">'<tt>cmpxchg</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  [volatile] cmpxchg &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>cmpxchg</tt>' instruction is used to atomically modify memory.
+It loads a value in memory and compares it to a given value. If they are
+equal, it stores a new value into the memory.</p>
+
+<h5>Arguments:</h5>
+<p>There are three arguments to the '<code>cmpxchg</code>' instruction: an
+address to operate on, a value to compare to the value currently be at that
+address, and a new value to place at that address if the compared values are
+equal.  The type of '<var>&lt;cmp&gt;</var>' must be an integer type whose
+bit width is a power of two greater than or equal to eight and less than
+or equal to a target-specific size limit. '<var>&lt;cmp&gt;</var>' and
+'<var>&lt;new&gt;</var>' must have the same type, and the type of
+'<var>&lt;pointer&gt;</var>' must be a pointer to that type. If the
+<code>cmpxchg</code> is marked as <code>volatile</code>, then the
+optimizer is not allowed to modify the number or order of execution
+of this <code>cmpxchg</code> with other <a href="#volatile">volatile
+operations</a>.</p>
+
+<!-- FIXME: Extend allowed types. -->
+
+<p>The <a href="#ordering"><var>ordering</var></a> argument specifies how this
+<code>cmpxchg</code> synchronizes with other atomic operations.</p>
+
+<p>The optional "<code>singlethread</code>" argument declares that the
+<code>cmpxchg</code> is only atomic with respect to code (usually signal
+handlers) running in the same thread as the <code>cmpxchg</code>.  Otherwise the
+cmpxchg is atomic with respect to all other code in the system.</p>
+
+<p>The pointer passed into cmpxchg must have alignment greater than or equal to
+the size in memory of the operand.
+
+<h5>Semantics:</h5>
+<p>The contents of memory at the location specified by the
+'<tt>&lt;pointer&gt;</tt>' operand is read and compared to
+'<tt>&lt;cmp&gt;</tt>'; if the read value is the equal,
+'<tt>&lt;new&gt;</tt>' is written.  The original value at the location
+is returned.
+
+<p>A successful <code>cmpxchg</code> is a read-modify-write instruction for the
+purpose of identifying <a href="#release_sequence">release sequences</a>.  A
+failed <code>cmpxchg</code> is equivalent to an atomic load with an ordering
+parameter determined by dropping any <code>release</code> part of the
+<code>cmpxchg</code>'s ordering.</p>
+
+<!--
+FIXME: Is compare_exchange_weak() necessary?  (Consider after we've done
+optimization work on ARM.)
+
+FIXME: Is a weaker ordering constraint on failure helpful in practice?
+-->
+
+<h5>Example:</h5>
+<pre>
+entry:
+  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                       <i>; yields {i32}</i>
+  <a href="#i_br">br</a> label %loop
+
+loop:
+  %cmp = <a href="#i_phi">phi</a> i32 [ %orig, %entry ], [%old, %loop]
+  %squared = <a href="#i_mul">mul</a> i32 %cmp, %cmp
+  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared                       <i>; yields {i32}</i>
+  %success = <a href="#i_icmp">icmp</a> eq i32 %cmp, %old
+  <a href="#i_br">br</a> i1 %success, label %done, label %loop
+
+done:
+  ...
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_atomicrmw">'<tt>atomicrmw</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  [volatile] atomicrmw &lt;operation&gt; &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;value&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>atomicrmw</tt>' instruction is used to atomically modify memory.</p>
+
+<h5>Arguments:</h5>
+<p>There are three arguments to the '<code>atomicrmw</code>' instruction: an
+operation to apply, an address whose value to modify, an argument to the
+operation.  The operation must be one of the following keywords:</p>
+<ul>
+  <li>xchg</li>
+  <li>add</li>
+  <li>sub</li>
+  <li>and</li>
+  <li>nand</li>
+  <li>or</li>
+  <li>xor</li>
+  <li>max</li>
+  <li>min</li>
+  <li>umax</li>
+  <li>umin</li>
+</ul>
+
+<p>The type of '<var>&lt;value&gt;</var>' must be an integer type whose
+bit width is a power of two greater than or equal to eight and less than
+or equal to a target-specific size limit.  The type of the
+'<code>&lt;pointer&gt;</code>' operand must be a pointer to that type.
+If the <code>atomicrmw</code> is marked as <code>volatile</code>, then the
+optimizer is not allowed to modify the number or order of execution of this
+<code>atomicrmw</code> with other <a href="#volatile">volatile
+  operations</a>.</p>
+
+<!-- FIXME: Extend allowed types. -->
+
+<h5>Semantics:</h5>
+<p>The contents of memory at the location specified by the
+'<tt>&lt;pointer&gt;</tt>' operand are atomically read, modified, and written
+back.  The original value at the location is returned.  The modification is
+specified by the <var>operation</var> argument:</p>
+
+<ul>
+  <li>xchg: <code>*ptr = val</code></li>
+  <li>add: <code>*ptr = *ptr + val</code></li>
+  <li>sub: <code>*ptr = *ptr - val</code></li>
+  <li>and: <code>*ptr = *ptr &amp; val</code></li>
+  <li>nand: <code>*ptr = ~(*ptr &amp; val)</code></li>
+  <li>or: <code>*ptr = *ptr | val</code></li>
+  <li>xor: <code>*ptr = *ptr ^ val</code></li>
+  <li>max: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using a signed comparison)</li>
+  <li>min: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using a signed comparison)</li>
+  <li>umax: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using an unsigned comparison)</li>
+  <li>umin: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using an unsigned comparison)</li>
+</ul>
+
+<h5>Example:</h5>
+<pre>
+  %old = atomicrmw add i32* %ptr, i32 1 acquire                        <i>; yields {i32}</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
 <h4>
    <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
 </h4>
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 74599100e1..69996074f2 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -187,10 +187,12 @@ typedef enum {
 
   /* Atomic operators */
   LLVMFence          = 55,
+  LLVMAtomicCmpXchg  = 56,
+  LLVMAtomicRMW      = 57,
 
   /* Exception Handling Operators */
-  LLVMLandingPad     = 56,
-  LLVMResume         = 57
+  LLVMLandingPad     = 58,
+  LLVMResume         = 59
 
 } LLVMOpcode;
 
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 0f74f633ea..71512166d4 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -205,6 +205,23 @@ namespace bitc {
     BINOP_XOR  = 12
   };
 
+  /// These are values used in the bitcode files to encode AtomicRMW operations.
+  /// The values of these enums have no fixed relation to the LLVM IR enum
+  /// values.  Changing these will break compatibility with old files.
+  enum RMWOperations {
+    RMW_XCHG = 0,
+    RMW_ADD = 1,
+    RMW_SUB = 2,
+    RMW_AND = 3,
+    RMW_NAND = 4,
+    RMW_OR = 5,
+    RMW_XOR = 6,
+    RMW_MAX = 7,
+    RMW_MIN = 8,
+    RMW_UMAX = 9,
+    RMW_UMIN = 10
+  };
+
   /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
   /// OverflowingBinaryOperator's SubclassOptionalData contents.
   enum OverflowingBinaryOperatorOptionalFlags {
@@ -285,7 +302,13 @@ namespace bitc {
 
     FUNC_CODE_DEBUG_LOC        = 35, // DEBUG_LOC:  [Line,Col,ScopeVal, IAVal]
     FUNC_CODE_INST_FENCE       = 36, // FENCE: [ordering, synchscope]
-    FUNC_CODE_INST_LANDINGPAD  = 37  // LANDINGPAD: [ty,val,val,num,id0,val0...]
+    FUNC_CODE_INST_LANDINGPAD  = 37, // LANDINGPAD: [ty,val,val,num,id0,val0...]
+    FUNC_CODE_INST_CMPXCHG     = 38, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol,
+                                     //           ordering, synchscope]
+    FUNC_CODE_INST_ATOMICRMW   = 39  // ATOMICRMW: [ptrty,ptr,val, operation,
+                                     //             align, vol,
+                                     //             ordering, synchscope]
+
   };
 } // End bitc namespace
 } // End llvm namespace
diff --git a/include/llvm/Instruction.def b/include/llvm/Instruction.def
index a68601fbcd..d36e4be1d9 100644
--- a/include/llvm/Instruction.def
+++ b/include/llvm/Instruction.def
@@ -135,43 +135,45 @@ HANDLE_MEMORY_INST(28, Load  , LoadInst  )  // Memory manipulation instrs
 HANDLE_MEMORY_INST(29, Store , StoreInst )
 HANDLE_MEMORY_INST(30, GetElementPtr, GetElementPtrInst)
 HANDLE_MEMORY_INST(31, Fence , FenceInst )
-  LAST_MEMORY_INST(31)
+HANDLE_MEMORY_INST(32, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(33, AtomicRMW , AtomicRMWInst )
+  LAST_MEMORY_INST(33)
 
 // Cast operators ...
 // NOTE: The order matters here because CastInst::isEliminableCastPair 
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(33)
-HANDLE_CAST_INST(33, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(34, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(35, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(36, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(37, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(38, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(39, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(40, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(41, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(42, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(43, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(44, BitCast , BitCastInst )  // Type cast
-  LAST_CAST_INST(44)
+ FIRST_CAST_INST(34)
+HANDLE_CAST_INST(34, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(35, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(36, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(37, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(38, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(39, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(40, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(41, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(42, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(43, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(44, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(45, BitCast , BitCastInst )  // Type cast
+  LAST_CAST_INST(45)
 
 // Other operators...
- FIRST_OTHER_INST(45)
-HANDLE_OTHER_INST(45, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(46, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(47, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(48, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(49, Select , SelectInst )  // select instruction
-HANDLE_OTHER_INST(50, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_OTHER_INST(51, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(52, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(53, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(54, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(55, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(56, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(57, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(58, LandingPad, LandingPadInst)  // Landing pad instruction.
-  LAST_OTHER_INST(58)
+ FIRST_OTHER_INST(46)
+HANDLE_OTHER_INST(46, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(47, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(48, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(49, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(50, Select , SelectInst )  // select instruction
+HANDLE_OTHER_INST(51, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_OTHER_INST(52, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(53, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(54, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(55, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(56, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(57, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(58, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(59, LandingPad, LandingPadInst)  // Landing pad instruction.
+  LAST_OTHER_INST(59)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 3e250d8527..cb21e6364b 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -362,6 +362,259 @@ private:
 };
 
 //===----------------------------------------------------------------------===//
+//                                AtomicCmpXchgInst Class
+//===----------------------------------------------------------------------===//
+
+/// AtomicCmpXchgInst - an instruction that atomically checks whether a
+/// specified value is in a memory location, and, if it is, stores a new value
+/// there.  Returns the value that was loaded.
+///
+class AtomicCmpXchgInst : public Instruction {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  void Init(Value *Ptr, Value *Cmp, Value *NewVal,
+            AtomicOrdering Ordering, SynchronizationScope SynchScope);
+protected:
+  virtual AtomicCmpXchgInst *clone_impl() const;
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                    Instruction *InsertBefore = 0);
+  AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                    BasicBlock *InsertAtEnd);
+
+  /// isVolatile - Return true if this is a cmpxchg from a volatile memory
+  /// location.
+  ///
+  bool isVolatile() const {
+    return getSubclassDataFromInstruction() & 1;
+  }
+
+  /// setVolatile - Specify whether this is a volatile cmpxchg.
+  ///
+  void setVolatile(bool V) {
+     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                                (unsigned)V);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Set the ordering constraint on this cmpxchg.
+  void setOrdering(AtomicOrdering Ordering) {
+    assert(Ordering != NotAtomic &&
+           "CmpXchg instructions can only be atomic.");
+    setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
+                               (Ordering << 2));
+  }
+
+  /// Specify whether this cmpxchg is atomic and orders other operations with
+  /// respect to all concurrently executing threads, or only with respect to
+  /// signal handlers executing in the same thread.
+  void setSynchScope(SynchronizationScope SynchScope) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) |
+                               (SynchScope << 1));
+  }
+
+  /// Returns the ordering constraint on this cmpxchg.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering(getSubclassDataFromInstruction() >> 2);
+  }
+
+  /// Returns whether this cmpxchg is atomic between threads or only within a
+  /// single thread.
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1);
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  Value *getCompareOperand() { return getOperand(1); }
+  const Value *getCompareOperand() const { return getOperand(1); }
+  
+  Value *getNewValOperand() { return getOperand(2); }
+  const Value *getNewValOperand() const { return getOperand(2); }
+  
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+  }
+  
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const AtomicCmpXchgInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::AtomicCmpXchg;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<AtomicCmpXchgInst> :
+    public FixedNumOperandTraits<AtomicCmpXchgInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                AtomicRMWInst Class
+//===----------------------------------------------------------------------===//
+
+/// AtomicRMWInst - an instruction that atomically reads a memory location,
+/// combines it with another value, and then stores the result back.  Returns
+/// the old value.
+///
+class AtomicRMWInst : public Instruction {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+protected:
+  virtual AtomicRMWInst *clone_impl() const;
+public:
+  /// This enumeration lists the possible modifications atomicrmw can make.  In
+  /// the descriptions, 'p' is the pointer to the instruction's memory location,
+  /// 'old' is the initial value of *p, and 'v' is the other value passed to the
+  /// instruction.  These instructions always return 'old'.
+  enum BinOp {
+    /// *p = v
+    Xchg,
+    /// *p = old + v
+    Add,
+    /// *p = old - v
+    Sub,
+    /// *p = old & v
+    And,
+    /// *p = ~old & v
+    Nand,
+    /// *p = old | v
+    Or,
+    /// *p = old ^ v
+    Xor,
+    /// *p = old >signed v ? old : v
+    Max,
+    /// *p = old <signed v ? old : v
+    Min,
+    /// *p = old >unsigned v ? old : v
+    UMax,
+    /// *p = old <unsigned v ? old : v
+    UMin,
+
+    FIRST_BINOP = Xchg,
+    LAST_BINOP = UMin,
+    BAD_BINOP
+  };
+
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                Instruction *InsertBefore = 0);
+  AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                BasicBlock *InsertAtEnd);
+
+  BinOp getOperation() const {
+    return static_cast<BinOp>(getSubclassDataFromInstruction() >> 5);
+  }
+
+  void setOperation(BinOp Operation) {
+    unsigned short SubclassData = getSubclassDataFromInstruction();
+    setInstructionSubclassData((SubclassData & 31) |
+                               (Operation << 5));
+  }
+
+  /// isVolatile - Return true if this is a RMW on a volatile memory location.
+  ///
+  bool isVolatile() const {
+    return getSubclassDataFromInstruction() & 1;
+  }
+
+  /// setVolatile - Specify whether this is a volatile RMW or not.
+  ///
+  void setVolatile(bool V) {
+     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                                (unsigned)V);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Set the ordering constraint on this RMW.
+  void setOrdering(AtomicOrdering Ordering) {
+    assert(Ordering != NotAtomic &&
+           "atomicrmw instructions can only be atomic.");
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~28) |
+                               (Ordering << 2));
+  }
+
+  /// Specify whether this RMW orders other operations with respect to all
+  /// concurrently executing threads, or only with respect to signal handlers
+  /// executing in the same thread.
+  void setSynchScope(SynchronizationScope SynchScope) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) |
+                               (SynchScope << 1));
+  }
+
+  /// Returns the ordering constraint on this RMW.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering((getSubclassDataFromInstruction() & 28) >> 2);
+  }
+
+  /// Returns whether this RMW is atomic between threads or only within a
+  /// single thread.
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1);
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  Value *getValOperand() { return getOperand(1); }
+  const Value *getValOperand() const { return getOperand(1); }
+
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const AtomicRMWInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::AtomicRMW;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  void Init(BinOp Operation, Value *Ptr, Value *Val,
+            AtomicOrdering Ordering, SynchronizationScope SynchScope);
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<AtomicRMWInst>
+    : public FixedNumOperandTraits<AtomicRMWInst,2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicRMWInst, Value)
+
+//===----------------------------------------------------------------------===//
 //                             GetElementPtrInst Class
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h
index 85e6f62903..a661c4fac6 100644
--- a/include/llvm/Support/InstVisitor.h
+++ b/include/llvm/Support/InstVisitor.h
@@ -170,6 +170,8 @@ public:
   RetTy visitAllocaInst(AllocaInst &I)              { DELEGATE(Instruction); }
   RetTy visitLoadInst(LoadInst     &I)              { DELEGATE(Instruction); }
   RetTy visitStoreInst(StoreInst   &I)              { DELEGATE(Instruction); }
+  RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I){ DELEGATE(Instruction); }
+  RetTy visitAtomicRMWInst(AtomicRMWInst &I)        { DELEGATE(Instruction); }
   RetTy visitFenceInst(FenceInst   &I)              { DELEGATE(Instruction); }
   RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction); }
   RetTy visitPHINode(PHINode       &I)              { DELEGATE(Instruction); }
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 970d7aa7ed..d16cac1af2 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -579,6 +579,9 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
   KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
 
+  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
+  KEYWORD(umin);
+
   KEYWORD(x);
   KEYWORD(blockaddress);
 
@@ -645,6 +648,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(alloca,      Alloca);
   INSTKEYWORD(load,        Load);
   INSTKEYWORD(store,       Store);
+  INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
+  INSTKEYWORD(atomicrmw,   AtomicRMW);
   INSTKEYWORD(fence,       Fence);
   INSTKEY