1 files changed, 1445 insertions, 2328 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9b833e0fb44..52de26daa1f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -56,14 +56,6 @@
 
 #include "drbd_vli.h"
 
-struct after_state_chg_work {
-	struct drbd_work w;
-	union drbd_state os;
-	union drbd_state ns;
-	enum chg_state_flags flags;
-	struct completion *done;
-};
-
 static DEFINE_MUTEX(drbd_main_mutex);
 int drbdd_init(struct drbd_thread *);
 int drbd_worker(struct drbd_thread *);
@@ -72,21 +64,17 @@ int drbd_asender(struct drbd_thread *);
 int drbd_init(void);
 static int drbd_open(struct block_device *bdev, fmode_t mode);
 static int drbd_release(struct gendisk *gd, fmode_t mode);
-static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
-			   union drbd_state ns, enum chg_state_flags flags);
-static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static int w_md_sync(struct drbd_work *w, int unused);
 static void md_sync_timer_fn(unsigned long data);
-static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static void _tl_clear(struct drbd_conf *mdev);
+static int w_bitmap_io(struct drbd_work *w, int unused);
+static int w_go_diskless(struct drbd_work *w, int unused);
 
 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
 	      "Lars Ellenberg <lars@linbit.com>");
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
 MODULE_VERSION(REL_VERSION);
 MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
 		 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
 MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
 
@@ -98,7 +86,6 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!");
 module_param(minor_count, uint, 0444);
 module_param(disable_sendpage, bool, 0644);
 module_param(allow_oos, bool, 0);
-module_param(cn_idx, uint, 0444);
 module_param(proc_details, int, 0644);
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
@@ -120,7 +107,6 @@ module_param(fault_devs, int, 0644);
 unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 bool disable_sendpage;
 bool allow_oos;
-unsigned int cn_idx = CN_IDX_DRBD;
 int proc_details;       /* Detail level in proc drbd*/
 
 /* Module parameter for setting the user mode helper program
@@ -132,10 +118,11 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0
 /* in 2.6.x, our device mapping and config info contains our virtual gendisks
  * as member "struct gendisk *vdisk;"
  */
-struct drbd_conf **minor_table;
+struct idr minors;
+struct list_head drbd_tconns;  /* list of struct drbd_tconn */
 
 struct kmem_cache *drbd_request_cache;
-struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+struct kmem_cache *drbd_ee_cache;	/* peer requests */
 struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
 struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
 mempool_t *drbd_request_mempool;
@@ -164,10 +151,15 @@ static const struct block_device_operations drbd_ops = {
 
 struct bio *bio_alloc_drbd(gfp_t gfp_mask)
 {
+	struct bio *bio;
+
 	if (!drbd_md_io_bio_set)
 		return bio_alloc(gfp_mask, 1);
 
-	return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+	bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+	if (!bio)
+		return NULL;
+	return bio;
 }
 
 #ifdef __CHECKER__
@@ -190,158 +182,87 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
 #endif
 
 /**
- * DOC: The transfer log
- *
- * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
- * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
- * of the list. There is always at least one &struct drbd_tl_epoch object.
- *
- * Each &struct drbd_tl_epoch has a circular double linked list of requests
- * attached.
- */
-static int tl_init(struct drbd_conf *mdev)
-{
-	struct drbd_tl_epoch *b;
-
-	/* during device minor initialization, we may well use GFP_KERNEL */
-	b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
-	if (!b)
-		return 0;
-	INIT_LIST_HEAD(&b->requests);
-	INIT_LIST_HEAD(&b->w.list);
-	b->next = NULL;
-	b->br_number = 4711;
-	b->n_writes = 0;
-	b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
-
-	mdev->oldest_tle = b;
-	mdev->newest_tle = b;
-	INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
-	INIT_LIST_HEAD(&mdev->barrier_acked_requests);
-
-	mdev->tl_hash = NULL;
-	mdev->tl_hash_s = 0;
-
-	return 1;
-}
-
-static void tl_cleanup(struct drbd_conf *mdev)
-{
-	D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
-	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
-	kfree(mdev->oldest_tle);
-	mdev->oldest_tle = NULL;
-	kfree(mdev->unused_spare_tle);
-	mdev->unused_spare_tle = NULL;
-	kfree(mdev->tl_hash);
-	mdev->tl_hash = NULL;
-	mdev->tl_hash_s = 0;
-}
-
-/**
- * _tl_add_barrier() - Adds a barrier to the transfer log
- * @mdev:	DRBD device.
- * @new:	Barrier to be added before the current head of the TL.
- *
- * The caller must hold the req_lock.
- */
-void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
-{
-	struct drbd_tl_epoch *newest_before;
-
-	INIT_LIST_HEAD(&new->requests);
-	INIT_LIST_HEAD(&new->w.list);
-	new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
-	new->next = NULL;
-	new->n_writes = 0;
-
-	newest_before = mdev->newest_tle;
-	new->br_number = newest_before->br_number+1;
-	if (mdev->newest_tle != new) {
-		mdev->newest_tle->next = new;
-		mdev->newest_tle = new;
-	}
-}
-
-/**
- * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
- * @mdev:	DRBD device.
+ * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch
+ * @tconn:	DRBD connection.
  * @barrier_nr:	Expected identifier of the DRBD write barrier packet.
  * @set_size:	Expected number of requests before that barrier.
  *
  * In case the passed barrier_nr or set_size does not match the oldest
- * &struct drbd_tl_epoch objects this function will cause a termination
- * of the connection.
+ * epoch of not yet barrier-acked requests, this function will cause a
+ * termination of the connection.
  */
-void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
-		       unsigned int set_size)
+void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
+		unsigned int set_size)
 {
-	struct drbd_tl_epoch *b, *nob; /* next old barrier */
-	struct list_head *le, *tle;
 	struct drbd_request *r;
-
-	spin_lock_irq(&mdev->req_lock);
-
-	b = mdev->oldest_tle;
+	struct drbd_request *req = NULL;
+	int expect_epoch = 0;
+	int expect_size = 0;
+
+	spin_lock_irq(&tconn->req_lock);
+
+	/* find oldest not yet barrier-acked write request,
+	 * count writes in its epoch. */
+	list_for_each_entry(r, &tconn->transfer_log, tl_requests) {
+		const unsigned s = r->rq_state;
+		if (!req) {
+			if (!(s & RQ_WRITE))
+				continue;
+			if (!(s & RQ_NET_MASK))
+				continue;
+			if (s & RQ_NET_DONE)
+				continue;
+			req = r;
+			expect_epoch = req->epoch;
+			expect_size ++;
+		} else {
+			if (r->epoch != expect_epoch)
+				break;
+			if (!(s & RQ_WRITE))
+				continue;
+			/* if (s & RQ_DONE): not expected */
+			/* if (!(s & RQ_NET_MASK)): not expected */
+			expect_size++;
+		}
+	}
 
 	/* first some paranoia code */
-	if (b == NULL) {
-		dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
-			barrier_nr);
+	if (req == NULL) {
+		conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
+			 barrier_nr);
 		goto bail;
 	}
-	if (b->br_number != barrier_nr) {
-		dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
-			barrier_nr, b->br_number);
+	if (expect_epoch != barrier_nr) {
+		conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
+			 barrier_nr, expect_epoch);
 		goto bail;
 	}
-	if (b->n_writes != set_size) {
-		dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
-			barrier_nr, set_size, b->n_writes);
+
+	if (expect_size != set_size) {
+		conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
+			 barrier_nr, set_size, expect_size);
 		goto bail;
 	}
 
-	/* Clean up list of requests processed during current epoch */
-	list_for_each_safe(le, tle, &b->requests) {
-		r = list_entry(le, struct drbd_request, tl_requests);
-		_req_mod(r, barrier_acked);
-	}
-	/* There could be requests on the list waiting for completion
-	   of the write to the local disk. To avoid corruptions of
-	   slab's data structures we have to remove the lists head.
-
-	   Also there could have been a barrier ack out of sequence, overtaking
-	   the write acks - which would be a bug and violating write ordering.
-	   To not deadlock in case we lose connection while such requests are
-	   still pending, we need some way to find them for the
-	   _req_mode(connection_lost_while_pending).
-
-	   These have been list_move'd to the out_of_sequence_requests list in
-	   _req_mod(, barrier_acked) above.
-	   */
-	list_splice_init(&b->requests, &mdev->barrier_acked_requests);
-
-	nob = b->next;
-	if (drbd_test_and_clear_flag(mdev, CREATE_BARRIER)) {
-		_tl_add_barrier(mdev, b);
-		if (nob)
-			mdev->oldest_tle = nob;
-		/* if nob == NULL b was the only barrier, and becomes the new
-		   barrier. Therefore mdev->oldest_tle points already to b */
-	} else {
-		D_ASSERT(nob != NULL);
-		mdev->oldest_tle = nob;
-		kfree(b);
+	/* Clean up list of requests processed during current epoch. */
+	/* this extra list walk restart is paranoia,
+	 * to catch requests being barrier-acked "unexpectedly".
+	 * It usually should find the same req again, or some READ preceding it. */
+	list_for_each_entry(req, &tconn->transfer_log, tl_requests)
+		if (req->epoch == expect_epoch)
+			break;
+	list_for_each_entry_safe_from(req, r, &tconn->transfer_log, tl_requests) {
+		if (req->epoch != expect_epoch)
+			break;
+		_req_mod(req, BARRIER_ACKED);
 	}
-
-	spin_unlock_irq(&mdev->req_lock);
-	dec_ap_pending(mdev);
+	spin_unlock_irq(&tconn->req_lock);
 
 	return;
 
 bail:
-	spin_unlock_irq(&mdev->req_lock);
-	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+	spin_unlock_irq(&tconn->req_lock);
+	conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
 }
 
 
@@ -350,85 +271,24 @@ bail:
  * @mdev:	DRBD device.
  * @what:       The action/event to perform with all request objects
  *
- * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
- * restart_frozen_disk_io.
+ * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
+ * RESTART_FROZEN_DISK_IO.
  */
-static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
-{
-	struct drbd_tl_epoch *b, *tmp, **pn;
-	struct list_head *le, *tle, carry_reads;
-	struct drbd_request *req;
-	int rv, n_writes, n_reads;
-
-	b = mdev->oldest_tle;
-	pn = &mdev->oldest_tle;
-	while (b) {
-		n_writes = 0;
-		n_reads = 0;
-		INIT_LIST_HEAD(&carry_reads);
-		list_for_each_safe(le, tle, &b->requests) {
-			req = list_entry(le, struct drbd_request, tl_requests);
-			rv = _req_mod(req, what);
-
-			n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
-			n_reads  += (rv & MR_READ) >> MR_READ_SHIFT;
-		}
-		tmp = b->next;
-
-		if (n_writes) {
-			if (what == resend) {
-				b->n_writes = n_writes;
-				if (b->w.cb == NULL) {
-					b->w.cb = w_send_barrier;
-					inc_ap_pending(mdev);
-					drbd_set_flag(mdev, CREATE_BARRIER);
-				}
-
-				drbd_queue_work(&mdev->data.work, &b->w);
-			}
-			pn = &b->next;
-		} else {
-			if (n_reads)
-				list_add(&carry_reads, &b->requests);
-			/* there could still be requests on that ring list,
-			 * in case local io is still pending */
-			list_del(&b->requests);
-
-			/* dec_ap_pending corresponding to queue_barrier.
-			 * the newest barrier may not have been queued yet,
-			 * in which case w.cb is still NULL. */
-			if (b->w.cb != NULL)
-				dec_ap_pending(mdev);
-
-			if (b == mdev->newest_tle) {
-				/* recycle, but reinit! */
-				D_ASSERT(tmp == NULL);
-				INIT_LIST_HEAD(&b->requests);
-				list_splice(&carry_reads, &b->requests);
-				INIT_LIST_HEAD(&b->w.list);
-				b->w.cb = NULL;
-				b->br_number = net_random();
-				b->n_writes = 0;
-
-				*pn = b;
-				break;
-			}
-			*pn = tmp;
-			kfree(b);
-		}
-		b = tmp;
-		list_splice(&carry_reads, &b->requests);
-	}
-
-	/* Actions operating on the disk state, also want to work on
-	   requests that got barrier acked. */
+/* must hold resource->req_lock */
+void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
+{
+	struct drbd_request *req, *r;
 
-	list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
-		req = list_entry(le, struct drbd_request, tl_requests);
+	list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests)
 		_req_mod(req, what);
-	}
 }
 
+void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
+{
+	spin_lock_irq(&tconn->req_lock);
+	_tl_restart(tconn, what);
+	spin_unlock_irq(&tconn->req_lock);
+}
 
 /**
  * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
@@ -438,43 +298,9 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
  * by the requests on the transfer gets marked as our of sync. Called from the
  * receiver thread and the worker thread.
  */
-void tl_clear(struct drbd_conf *mdev)
+void tl_clear(struct drbd_tconn *tconn)
 {
-	spin_lock_irq(&mdev->req_lock);
-	_tl_clear(mdev);
-	spin_unlock_irq(&mdev->req_lock);
-}
-
-static void _tl_clear(struct drbd_conf *mdev)
-{
-	struct list_head *le, *tle;
-	struct drbd_request *r;
-
-	_tl_restart(mdev, connection_lost_while_pending);
-
-	/* we expect this list to be empty. */
-	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
-
-	/* but just in case, clean it up anyways! */
-	list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
-		r = list_entry(le, struct drbd_request, tl_requests);
-		/* It would be nice to complete outside of spinlock.
-		 * But this is easier for now. */
-		_req_mod(r, connection_lost_while_pending);
-	}
-
-	/* ensure bit indicating barrier is required is clear */
-	drbd_clear_flag(mdev, CREATE_BARRIER);
-
-	memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
-
-}
-
-void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
-{
-	spin_lock_irq(&mdev->req_lock);
-	_tl_restart(mdev, what);
-	spin_unlock_irq(&mdev->req_lock);
+	tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
 }
 
 /**
@@ -483,1392 +309,131 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
  */
 void tl_abort_disk_io(struct drbd_conf *mdev)
 {
-	struct drbd_tl_epoch *b;
-	struct list_head *le, *tle;
-	struct drbd_request *req;
+	struct drbd_tconn *tconn = mdev->tconn;
+	struct drbd_request *req, *r;
 
-	spin_lock_irq(&mdev->req_lock);
-	b = mdev->oldest_tle;
-	while (b) {
-		list_for_each_safe(le, tle, &b->requests) {
-			req = list_entry(le, struct drbd_request, tl_requests);
-			if (!(req->rq_state & RQ_LOCAL_PENDING))
-				continue;
-			_req_mod(req, abort_disk_io);
-		}
-		b = b->next;
-	}
-
-	list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
-		req = list_entry(le, struct drbd_request, tl_requests);
+	spin_lock_irq(&tconn->req_lock);
+	list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) {
 		if (!(req->rq_state & RQ_LOCAL_PENDING))
 			continue;
-		_req_mod(req, abort_disk_io);
-	}
-
-	spin_unlock_irq(&mdev->req_lock);
-}
-
-/**
- * cl_wide_st_chg() - true if the state change is a cluster wide one
- * @mdev:	DRBD device.
- * @os:		old (current) state.
- * @ns:		new (wanted) state.
- */
-static int cl_wide_st_chg(struct drbd_conf *mdev,
-			  union drbd_state os, union drbd_state ns)
-{
-	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
-		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
-		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
-		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
-		  (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
-		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
-		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
-}
-
-enum drbd_state_rv
-drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
-		  union drbd_state mask, union drbd_state val)
-{
-	unsigned long flags;
-	union drbd_state os, ns;
-	enum drbd_state_rv rv;
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-	rv = _drbd_set_state(mdev, ns, f, NULL);
-	ns = mdev->state;
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	return rv;
-}
-
-/**
- * drbd_force_state() - Impose a change which happens outside our control on our state
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- */
-void drbd_force_state(struct drbd_conf *mdev,
-	union drbd_state mask, union drbd_state val)
-{
-	drbd_change_state(mdev, CS_HARD, mask, val);
-}
-
-static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
-static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
-						    union drbd_state,
-						    union drbd_state);
-enum sanitize_state_warnings {
-	NO_WARNING,
-	ABORTED_ONLINE_VERIFY,
-	ABORTED_RESYNC,
-	CONNECTION_LOST_NEGOTIATING,
-	IMPLICITLY_UPGRADED_DISK,
-	IMPLICITLY_UPGRADED_PDSK,
-};
-static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
-				       union drbd_state ns, enum sanitize_state_warnings *warn);
-int drbd_send_state_req(struct drbd_conf *,
-			union drbd_state, union drbd_state);
-
-static enum drbd_state_rv
-_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
-	     union drbd_state val)
-{
-	union drbd_state os, ns;
-	unsigned long flags;
-	enum drbd_state_rv rv;
-
-	if (drbd_test_and_clear_flag(mdev, CL_ST_CHG_SUCCESS))
-		return SS_CW_SUCCESS;
-
-	if (drbd_test_and_clear_flag(mdev, CL_ST_CHG_FAIL))
-		return SS_CW_FAILED_BY_PEER;
-
-	rv = 0;
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-	ns = sanitize_state(mdev, os, ns, NULL);
-
-	if (!cl_wide_st_chg(mdev, os, ns))
-		rv = SS_CW_NO_NEED;
-	if (!rv) {
-		rv = is_valid_state(mdev, ns);
-		if (rv == SS_SUCCESS) {
-			rv = is_valid_state_transition(mdev, ns, os);
-			if (rv == SS_SUCCESS)
-				rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
-		}
-	}
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	return rv;
-}
-
-/**
- * drbd_req_state() - Perform an eventually cluster wide state change
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- * @f:		flags
- *
- * Should not be called directly, use drbd_request_state() or
- * _drbd_request_state().
- */
-static enum drbd_state_rv
-drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
-	       union drbd_state val, enum chg_state_flags f)
-{
-	struct completion done;
-	unsigned long flags;
-	union drbd_state os, ns;
-	enum drbd_state_rv rv;
-
-	init_completion(&done);
-
-	if (f & CS_SERIALIZE)
-		mutex_lock(&mdev->state_mutex);
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-	ns = sanitize_state(mdev, os, ns, NULL);
-
-	if (cl_wide_st_chg(mdev, os, ns)) {
-		rv = is_valid_state(mdev, ns);
-		if (rv == SS_SUCCESS)
-			rv = is_valid_state_transition(mdev, ns, os);
-		spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-		if (rv < SS_SUCCESS) {
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
-		}
-
-		drbd_state_lock(mdev);
-		if (!drbd_send_state_req(mdev, mask, val)) {
-			drbd_state_unlock(mdev);
-			rv = SS_CW_FAILED_BY_PEER;
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
-		}
-
-		if (mask.conn == C_MASK && val.conn == C_DISCONNECTING)
-			drbd_set_flag(mdev, DISCONNECT_SENT);
-
-		wait_event(mdev->state_wait,
-			(rv = _req_st_cond(mdev, mask, val)));
-
-		if (rv < SS_SUCCESS) {
-			drbd_state_unlock(mdev);
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
-		}
-		spin_lock_irqsave(&mdev->req_lock, flags);
-		os = mdev->state;
-		ns.i = (os.i & ~mask.i) | val.i;
-		rv = _drbd_set_state(mdev, ns, f, &done);
-		drbd_state_unlock(mdev);
-	} else {
-		rv = _drbd_set_state(mdev, ns, f, &done);
-	}
-
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
-		D_ASSERT(current != mdev->worker.task);
-		wait_for_completion(&done);
-	}
-
-abort:
-	if (f & CS_SERIALIZE)
-		mutex_unlock(&mdev->state_mutex);
-
-	return rv;
-}
-
-/**
- * _drbd_request_state() - Request a state change (with flags)
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- * @f:		flags
- *
- * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
- * flag, or when logging of failed state change requests is not desired.
- */
-enum drbd_state_rv
-_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
-		    union drbd_state val, enum chg_state_flags f)
-{
-	enum drbd_state_rv rv;
-
-	wait_event(mdev->state_wait,
-		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
-
-	return rv;
-}
-
-static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
-{
-	dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
-	    name,
-	    drbd_conn_str(ns.conn),
-	    drbd_role_str(ns.role),
-	    drbd_role_str(ns.peer),
-	    drbd_disk_str(ns.disk),
-	    drbd_disk_str(ns.pdsk),
-	    is_susp(ns) ? 's' : 'r',
-	    ns.aftr_isp ? 'a' : '-',
-	    ns.peer_isp ? 'p' : '-',
-	    ns.user_isp ? 'u' : '-'
-	    );
-}
-
-void print_st_err(struct drbd_conf *mdev, union drbd_state os,
-	          union drbd_state ns, enum drbd_state_rv err)
-{
-	if (err == SS_IN_TRANSIENT_STATE)
-		return;
-	dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
-	print_st(mdev, " state", os);
-	print_st(mdev, "wanted", ns);
-}
-
-
-/**
- * is_valid_state() - Returns an SS_ error code if ns is not valid
- * @mdev:	DRBD device.
- * @ns:		State to consider.
- */
-static enum drbd_state_rv
-is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
-{
-	/* See drbd_state_sw_errors in drbd_strings.c */
-
-	enum drbd_fencing_p fp;
-	enum drbd_state_rv rv = SS_SUCCESS;
-
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	if (get_net_conf(mdev)) {
-		if (!mdev->net_conf->two_primaries &&
-		    ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
-			rv = SS_TWO_PRIMARIES;
-		put_net_conf(mdev);
-	}
-
-	if (rv <= 0)
-		/* already found a reason to abort */;
-	else if (ns.role == R_SECONDARY && mdev->open_cnt)
-		rv = SS_DEVICE_IN_USE;
-
-	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
-		rv = SS_NO_UP_TO_DATE_DISK;
-
-	else if (fp >= FP_RESOURCE &&
-		 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
-		rv = SS_PRIMARY_NOP;
-
-	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
-		rv = SS_NO_UP_TO_DATE_DISK;
-
-	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
-		rv = SS_NO_LOCAL_DISK;
-
-	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
-		rv = SS_NO_REMOTE_DISK;
-
-	else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
-		rv = SS_NO_UP_TO_DATE_DISK;
-
-	else if ((ns.conn == C_CONNECTED ||
-		  ns.conn == C_WF_BITMAP_S ||
-		  ns.conn == C_SYNC_SOURCE ||
-		  ns.conn == C_PAUSED_SYNC_S) &&
-		  ns.disk == D_OUTDATED)
-		rv = SS_CONNECTED_OUTDATES;
-
-	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-		 (mdev->sync_conf.verify_alg[0] == 0))
-		rv = SS_NO_VERIFY_ALG;
-
-	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-		  mdev->agreed_pro_version < 88)
-		rv = SS_NOT_SUPPORTED;
-
-	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
-		rv = SS_CONNECTED_OUTDATES;
-
-	return rv;
-}
-
-/**
- * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
- * @mdev:	DRBD device.
- * @ns:		new state.
- * @os:		old state.
- */
-static enum drbd_state_rv
-is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
-			  union drbd_state os)
-{
-	enum drbd_state_rv rv = SS_SUCCESS;
-
-	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
-	    os.conn > C_CONNECTED)
-		rv = SS_RESYNC_RUNNING;
-
-	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
-		rv = SS_ALREADY_STANDALONE;
-
-	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
-		rv = SS_IS_DISKLESS;
-
-	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
-		rv = SS_NO_NET_CONFIG;
-
-	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
-		rv = SS_LOWER_THAN_OUTDATED;
-
-	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
-		rv = SS_IN_TRANSIENT_STATE;
-
-	if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
-		rv = SS_IN_TRANSIENT_STATE;
-
-	/* While establishing a connection only allow cstate to change.
-	   Delay/refuse role changes, detach attach etc... */
-	if (drbd_test_flag(mdev, STATE_SENT) &&
-	    !(os.conn == C_WF_REPORT_PARAMS ||
-	      (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
-		rv = SS_IN_TRANSIENT_STATE;
-
-	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
-		rv = SS_NEED_CONNECTION;
-
-	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-	    ns.conn != os.conn && os.conn > C_CONNECTED)
-		rv = SS_RESYNC_RUNNING;
-
-	if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
-	    os.conn < C_CONNECTED)
-		rv = SS_NEED_CONNECTION;
-
-	if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
-	    && os.conn < C_WF_REPORT_PARAMS)
-		rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
-
-	return rv;
-}
-
-static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
-{
-	static const char *msg_table[] = {
-		[NO_WARNING] = "",
-		[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
-		[ABORTED_RESYNC] = "Resync aborted.",
-		[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
-		[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
-		[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
-	};
-
-	if (warn != NO_WARNING)
-		dev_warn(DEV, "%s\n", msg_table[warn]);
-}
-
-/**
- * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
- * @mdev:	DRBD device.
- * @os:		old state.
- * @ns:		new state.
- * @warn_sync_abort:
- *
- * When we loose connection, we have to set the state of the peers disk (pdsk)
- * to D_UNKNOWN. This rule and many more along those lines are in this function.
- */
-static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
-				       union drbd_state ns, enum sanitize_state_warnings *warn)
-{
-	enum drbd_fencing_p fp;
-	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
-
-	if (warn)
-		*warn = NO_WARNING;
-
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	/* Disallow Network errors to configure a device's network part */
-	if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
-	    os.conn <= C_DISCONNECTING)
-		ns.conn = os.conn;
-
-	/* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
-	 * If you try to go into some Sync* state, that shall fail (elsewhere). */
-	if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
-	    ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)
-		ns.conn = os.conn;
-
-	/* we cannot fail (again) if we already detached */
-	if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
-		ns.disk = D_DISKLESS;
-
-	/* After C_DISCONNECTING only C_STANDALONE may follow */
-	if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
-		ns.conn = os.conn;
-
-	if (ns.conn < C_CONNECTED) {
-		ns.peer_isp = 0;
-		ns.peer = R_UNKNOWN;
-		if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
-			ns.pdsk = D_UNKNOWN;
-	}
-
-	/* Clear the aftr_isp when becoming unconfigured */
-	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
-		ns.aftr_isp = 0;
-
-	/* Abort resync if a disk fails/detaches */
-	if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
-	    (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
-		if (warn)
-			*warn =	os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
-				ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
-		ns.conn = C_CONNECTED;
-	}
-
-	/* Connection breaks down before we finished "Negotiating" */
-	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
-	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
-		if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
-			ns.disk = mdev->new_state_tmp.disk;
-			ns.pdsk = mdev->new_state_tmp.pdsk;
-		} else {
-			if (warn)
-				*warn = CONNECTION_LOST_NEGOTIATING;
-			ns.disk = D_DISKLESS;
-			ns.pdsk = D_UNKNOWN;
-		}
-		put_ldev(mdev);
-	}
-
-	/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
-	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
-		if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
-			ns.disk = D_UP_TO_DATE;
-		if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
-			ns.pdsk = D_UP_TO_DATE;
-	}
-
-	/* Implications of the connection stat on the disk states */
-	disk_min = D_DISKLESS;
-	disk_max = D_UP_TO_DATE;
-	pdsk_min = D_INCONSISTENT;
-	pdsk_max = D_UNKNOWN;
-	switch ((enum drbd_conns)ns.conn) {
-	case C_WF_BITMAP_T:
-	case C_PAUSED_SYNC_T:
-	case C_STARTING_SYNC_T:
-	case C_WF_SYNC_UUID:
-	case C_BEHIND:
-		disk_min = D_INCONSISTENT;
-		disk_max = D_OUTDATED;
-		pdsk_min = D_UP_TO_DATE;
-		pdsk_max = D_UP_TO_DATE;
-		break;
-	case C_VERIFY_S:
-	case C_VERIFY_T:
-		disk_min = D_UP_TO_DATE;
-		disk_max = D_UP_TO_DATE;
-		pdsk_min = D_UP_TO_DATE;
-		pdsk_max = D_UP_TO_DATE;
-		break;
-	case C_CONNECTED:
-		disk_min = D_DISKLESS;
-		disk_max = D_UP_TO_DATE;
-		pdsk_min = D_DISKLESS;
-		pdsk_max = D_UP_TO_DATE;
-		break;
-	case C_WF_BITMAP_S:
-	case C_PAUSED_SYNC_S:
-	case C_STARTING_SYNC_S:
-	case C_AHEAD:
-		disk_min = D_UP_TO_DATE;
-		disk_max = D_UP_TO_DATE;
-		pdsk_min = D_INCONSISTENT;
-		pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
-		break;
-	case C_SYNC_TARGET:
-		disk_min = D_INCONSISTENT;
-		disk_max = D_INCONSISTENT;
-		pdsk_min = D_UP_TO_DATE;
-		pdsk_max = D_UP_TO_DATE;
-		break;
-	case C_SYNC_SOURCE:
-		disk_min = D_UP_TO_DATE;
-		disk_max = D_UP_TO_DATE;
-		pdsk_min = D_INCONSISTENT;
-		pdsk_max = D_INCONSISTENT;
-		break;
-	case C_STANDALONE:
-	case C_DISCONNECTING:
-	case C_UNCONNECTED:
-	case C_TIMEOUT:
-	case C_BROKEN_PIPE:
-	case C_NETWORK_FAILURE:
-	case C_PROTOCOL_ERROR:
-	case C_TEAR_DOWN:
-	case C_WF_CONNECTION:
-	case C_WF_REPORT_PARAMS:
-	case C_MASK:
-		break;
-	}
-	if (ns.disk > disk_max)
-		ns.disk = disk_max;
-
-	if (ns.disk < disk_min) {
-		if (warn)
-			*warn = IMPLICITLY_UPGRADED_DISK;
-		ns.disk = disk_min;
-	}
-	if (ns.pdsk > pdsk_max)
-		ns.pdsk = pdsk_max;
-
-	if (ns.pdsk < pdsk_min) {
-		if (warn)
-			*warn = IMPLICITLY_UPGRADED_PDSK;
-		ns.pdsk = pdsk_min;
-	}
-
-	if (fp == FP_STONITH &&
-	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
-	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
-		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
-
-	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
-	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
-	    !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
-		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
-
-	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
-		if (ns.conn == C_SYNC_SOURCE)
-			ns.conn = C_PAUSED_SYNC_S;
-		if (ns.conn == C_SYNC_TARGET)
-			ns.conn = C_PAUSED_SYNC_T;
-	} else {
-		if (ns.conn == C_PAUSED_SYNC_S)
-			ns.conn = C_SYNC_SOURCE;
-		if (ns.conn == C_PAUSED_SYNC_T)
-			ns.conn = C_SYNC_TARGET;
-	}
-
-	return ns;
-}
-
-/* helper for __drbd_set_state */
-static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
-{
-	if (mdev->agreed_pro_version < 90)
-		mdev->ov_start_sector = 0;
-	mdev->rs_total = drbd_bm_bits(mdev);
-	mdev->ov_position = 0;
-	if (cs == C_VERIFY_T) {
-		/* starting online verify from an arbitrary position
-		 * does not fit well into the existing protocol.
-		 * on C_VERIFY_T, we initialize ov_left and friends
-		 * implicitly in receive_DataRequest once the
-		 * first P_OV_REQUEST is received */
-		mdev->ov_start_sector = ~(sector_t)0;
-	} else {
-		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
-		if (bit >= mdev->rs_total) {
-			mdev->ov_start_sector =
-				BM_BIT_TO_SECT(mdev->rs_total - 1);
-			mdev->rs_total = 1;
-		} else
-			mdev->rs_total -= bit;
-		mdev->ov_position = mdev->ov_start_sector;
-	}
-	mdev->ov_left = mdev->rs_total;
-}
-
-static void drbd_resume_al(struct drbd_conf *mdev)
-{
-	if (drbd_test_and_clear_flag(mdev, AL_SUSPENDED))
-		dev_info(DEV, "Resumed AL updates\n");
-}
-
-/**
- * __drbd_set_state() - Set a new DRBD state
- * @mdev:	DRBD device.
- * @ns:		new state.
- * @flags:	Flags
- * @done:	Optional completion, that will get completed after the after_state_ch() finished
- *
- * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
- */
-enum drbd_state_rv
-__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
-	         enum chg_state_flags flags, struct completion *done)
-{
-	union drbd_state os;
-	enum drbd_state_r