aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_main.c')
-rw-r--r--drivers/block/drbd/drbd_main.c3773
1 files changed, 1445 insertions, 2328 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9b833e0fb44..52de26daa1f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -56,14 +56,6 @@
#include "drbd_vli.h"
-struct after_state_chg_work {
- struct drbd_work w;
- union drbd_state os;
- union drbd_state ns;
- enum chg_state_flags flags;
- struct completion *done;
-};
-
static DEFINE_MUTEX(drbd_main_mutex);
int drbdd_init(struct drbd_thread *);
int drbd_worker(struct drbd_thread *);
@@ -72,21 +64,17 @@ int drbd_asender(struct drbd_thread *);
int drbd_init(void);
static int drbd_open(struct block_device *bdev, fmode_t mode);
static int drbd_release(struct gendisk *gd, fmode_t mode);
-static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
- union drbd_state ns, enum chg_state_flags flags);
-static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static int w_md_sync(struct drbd_work *w, int unused);
static void md_sync_timer_fn(unsigned long data);
-static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-static void _tl_clear(struct drbd_conf *mdev);
+static int w_bitmap_io(struct drbd_work *w, int unused);
+static int w_go_diskless(struct drbd_work *w, int unused);
MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
"Lars Ellenberg <lars@linbit.com>");
MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
MODULE_VERSION(REL_VERSION);
MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
__stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
@@ -98,7 +86,6 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!");
module_param(minor_count, uint, 0444);
module_param(disable_sendpage, bool, 0644);
module_param(allow_oos, bool, 0);
-module_param(cn_idx, uint, 0444);
module_param(proc_details, int, 0644);
#ifdef CONFIG_DRBD_FAULT_INJECTION
@@ -120,7 +107,6 @@ module_param(fault_devs, int, 0644);
unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
bool disable_sendpage;
bool allow_oos;
-unsigned int cn_idx = CN_IDX_DRBD;
int proc_details; /* Detail level in proc drbd*/
/* Module parameter for setting the user mode helper program
@@ -132,10 +118,11 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0
/* in 2.6.x, our device mapping and config info contains our virtual gendisks
* as member "struct gendisk *vdisk;"
*/
-struct drbd_conf **minor_table;
+struct idr minors;
+struct list_head drbd_tconns; /* list of struct drbd_tconn */
struct kmem_cache *drbd_request_cache;
-struct kmem_cache *drbd_ee_cache; /* epoch entries */
+struct kmem_cache *drbd_ee_cache; /* peer requests */
struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
mempool_t *drbd_request_mempool;
@@ -164,10 +151,15 @@ static const struct block_device_operations drbd_ops = {
struct bio *bio_alloc_drbd(gfp_t gfp_mask)
{
+ struct bio *bio;
+
if (!drbd_md_io_bio_set)
return bio_alloc(gfp_mask, 1);
- return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+ bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+ if (!bio)
+ return NULL;
+ return bio;
}
#ifdef __CHECKER__
@@ -190,158 +182,87 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
#endif
/**
- * DOC: The transfer log
- *
- * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
- * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
- * of the list. There is always at least one &struct drbd_tl_epoch object.
- *
- * Each &struct drbd_tl_epoch has a circular double linked list of requests
- * attached.
- */
-static int tl_init(struct drbd_conf *mdev)
-{
- struct drbd_tl_epoch *b;
-
- /* during device minor initialization, we may well use GFP_KERNEL */
- b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
- if (!b)
- return 0;
- INIT_LIST_HEAD(&b->requests);
- INIT_LIST_HEAD(&b->w.list);
- b->next = NULL;
- b->br_number = 4711;
- b->n_writes = 0;
- b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
-
- mdev->oldest_tle = b;
- mdev->newest_tle = b;
- INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
- INIT_LIST_HEAD(&mdev->barrier_acked_requests);
-
- mdev->tl_hash = NULL;
- mdev->tl_hash_s = 0;
-
- return 1;
-}
-
-static void tl_cleanup(struct drbd_conf *mdev)
-{
- D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
- D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
- kfree(mdev->oldest_tle);
- mdev->oldest_tle = NULL;
- kfree(mdev->unused_spare_tle);
- mdev->unused_spare_tle = NULL;
- kfree(mdev->tl_hash);
- mdev->tl_hash = NULL;
- mdev->tl_hash_s = 0;
-}
-
-/**
- * _tl_add_barrier() - Adds a barrier to the transfer log
- * @mdev: DRBD device.
- * @new: Barrier to be added before the current head of the TL.
- *
- * The caller must hold the req_lock.
- */
-void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
-{
- struct drbd_tl_epoch *newest_before;
-
- INIT_LIST_HEAD(&new->requests);
- INIT_LIST_HEAD(&new->w.list);
- new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
- new->next = NULL;
- new->n_writes = 0;
-
- newest_before = mdev->newest_tle;
- new->br_number = newest_before->br_number+1;
- if (mdev->newest_tle != new) {
- mdev->newest_tle->next = new;
- mdev->newest_tle = new;
- }
-}
-
-/**
- * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
- * @mdev: DRBD device.
+ * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch
+ * @tconn: DRBD connection.
* @barrier_nr: Expected identifier of the DRBD write barrier packet.
* @set_size: Expected number of requests before that barrier.
*
* In case the passed barrier_nr or set_size does not match the oldest
- * &struct drbd_tl_epoch objects this function will cause a termination
- * of the connection.
+ * epoch of not yet barrier-acked requests, this function will cause a
+ * termination of the connection.
*/
-void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
- unsigned int set_size)
+void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
+ unsigned int set_size)
{
- struct drbd_tl_epoch *b, *nob; /* next old barrier */
- struct list_head *le, *tle;
struct drbd_request *r;
-
- spin_lock_irq(&mdev->req_lock);
-
- b = mdev->oldest_tle;
+ struct drbd_request *req = NULL;
+ int expect_epoch = 0;
+ int expect_size = 0;
+
+ spin_lock_irq(&tconn->req_lock);
+
+ /* find oldest not yet barrier-acked write request,
+ * count writes in its epoch. */
+ list_for_each_entry(r, &tconn->transfer_log, tl_requests) {
+ const unsigned s = r->rq_state;
+ if (!req) {
+ if (!(s & RQ_WRITE))
+ continue;
+ if (!(s & RQ_NET_MASK))
+ continue;
+ if (s & RQ_NET_DONE)
+ continue;
+ req = r;
+ expect_epoch = req->epoch;
+ expect_size ++;
+ } else {
+ if (r->epoch != expect_epoch)
+ break;
+ if (!(s & RQ_WRITE))
+ continue;
+ /* if (s & RQ_DONE): not expected */
+ /* if (!(s & RQ_NET_MASK)): not expected */
+ expect_size++;
+ }
+ }
/* first some paranoia code */
- if (b == NULL) {
- dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
- barrier_nr);
+ if (req == NULL) {
+ conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
+ barrier_nr);
goto bail;
}
- if (b->br_number != barrier_nr) {
- dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
- barrier_nr, b->br_number);
+ if (expect_epoch != barrier_nr) {
+ conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
+ barrier_nr, expect_epoch);
goto bail;
}
- if (b->n_writes != set_size) {
- dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
- barrier_nr, set_size, b->n_writes);
+
+ if (expect_size != set_size) {
+ conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
+ barrier_nr, set_size, expect_size);
goto bail;
}
- /* Clean up list of requests processed during current epoch */
- list_for_each_safe(le, tle, &b->requests) {
- r = list_entry(le, struct drbd_request, tl_requests);
- _req_mod(r, barrier_acked);
- }
- /* There could be requests on the list waiting for completion
- of the write to the local disk. To avoid corruptions of
- slab's data structures we have to remove the lists head.
-
- Also there could have been a barrier ack out of sequence, overtaking
- the write acks - which would be a bug and violating write ordering.
- To not deadlock in case we lose connection while such requests are
- still pending, we need some way to find them for the
- _req_mode(connection_lost_while_pending).
-
- These have been list_move'd to the out_of_sequence_requests list in
- _req_mod(, barrier_acked) above.
- */
- list_splice_init(&b->requests, &mdev->barrier_acked_requests);
-
- nob = b->next;
- if (drbd_test_and_clear_flag(mdev, CREATE_BARRIER)) {
- _tl_add_barrier(mdev, b);
- if (nob)
- mdev->oldest_tle = nob;
- /* if nob == NULL b was the only barrier, and becomes the new
- barrier. Therefore mdev->oldest_tle points already to b */
- } else {
- D_ASSERT(nob != NULL);
- mdev->oldest_tle = nob;
- kfree(b);
+ /* Clean up list of requests processed during current epoch. */
+ /* this extra list walk restart is paranoia,
+ * to catch requests being barrier-acked "unexpectedly".
+ * It usually should find the same req again, or some READ preceding it. */
+ list_for_each_entry(req, &tconn->transfer_log, tl_requests)
+ if (req->epoch == expect_epoch)
+ break;
+ list_for_each_entry_safe_from(req, r, &tconn->transfer_log, tl_requests) {
+ if (req->epoch != expect_epoch)
+ break;
+ _req_mod(req, BARRIER_ACKED);
}
-
- spin_unlock_irq(&mdev->req_lock);
- dec_ap_pending(mdev);
+ spin_unlock_irq(&tconn->req_lock);
return;
bail:
- spin_unlock_irq(&mdev->req_lock);
- drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+ spin_unlock_irq(&tconn->req_lock);
+ conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
}
@@ -350,85 +271,24 @@ bail:
* @mdev: DRBD device.
* @what: The action/event to perform with all request objects
*
- * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
- * restart_frozen_disk_io.
+ * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
+ * RESTART_FROZEN_DISK_IO.
*/
-static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
-{
- struct drbd_tl_epoch *b, *tmp, **pn;
- struct list_head *le, *tle, carry_reads;
- struct drbd_request *req;
- int rv, n_writes, n_reads;
-
- b = mdev->oldest_tle;
- pn = &mdev->oldest_tle;
- while (b) {
- n_writes = 0;
- n_reads = 0;
- INIT_LIST_HEAD(&carry_reads);
- list_for_each_safe(le, tle, &b->requests) {
- req = list_entry(le, struct drbd_request, tl_requests);
- rv = _req_mod(req, what);
-
- n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
- n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
- }
- tmp = b->next;
-
- if (n_writes) {
- if (what == resend) {
- b->n_writes = n_writes;
- if (b->w.cb == NULL) {
- b->w.cb = w_send_barrier;
- inc_ap_pending(mdev);
- drbd_set_flag(mdev, CREATE_BARRIER);
- }
-
- drbd_queue_work(&mdev->data.work, &b->w);
- }
- pn = &b->next;
- } else {
- if (n_reads)
- list_add(&carry_reads, &b->requests);
- /* there could still be requests on that ring list,
- * in case local io is still pending */
- list_del(&b->requests);
-
- /* dec_ap_pending corresponding to queue_barrier.
- * the newest barrier may not have been queued yet,
- * in which case w.cb is still NULL. */
- if (b->w.cb != NULL)
- dec_ap_pending(mdev);
-
- if (b == mdev->newest_tle) {
- /* recycle, but reinit! */
- D_ASSERT(tmp == NULL);
- INIT_LIST_HEAD(&b->requests);
- list_splice(&carry_reads, &b->requests);
- INIT_LIST_HEAD(&b->w.list);
- b->w.cb = NULL;
- b->br_number = net_random();
- b->n_writes = 0;
-
- *pn = b;
- break;
- }
- *pn = tmp;
- kfree(b);
- }
- b = tmp;
- list_splice(&carry_reads, &b->requests);
- }
-
- /* Actions operating on the disk state, also want to work on
- requests that got barrier acked. */
+/* must hold resource->req_lock */
+void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
+{
+ struct drbd_request *req, *r;
- list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
- req = list_entry(le, struct drbd_request, tl_requests);
+ list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests)
_req_mod(req, what);
- }
}
+void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
+{
+ spin_lock_irq(&tconn->req_lock);
+ _tl_restart(tconn, what);
+ spin_unlock_irq(&tconn->req_lock);
+}
/**
* tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
@@ -438,43 +298,9 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
* by the requests on the transfer gets marked as our of sync. Called from the
* receiver thread and the worker thread.
*/
-void tl_clear(struct drbd_conf *mdev)
+void tl_clear(struct drbd_tconn *tconn)
{
- spin_lock_irq(&mdev->req_lock);
- _tl_clear(mdev);
- spin_unlock_irq(&mdev->req_lock);
-}
-
-static void _tl_clear(struct drbd_conf *mdev)
-{
- struct list_head *le, *tle;
- struct drbd_request *r;
-
- _tl_restart(mdev, connection_lost_while_pending);
-
- /* we expect this list to be empty. */
- D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
-
- /* but just in case, clean it up anyways! */
- list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
- r = list_entry(le, struct drbd_request, tl_requests);
- /* It would be nice to complete outside of spinlock.
- * But this is easier for now. */
- _req_mod(r, connection_lost_while_pending);
- }
-
- /* ensure bit indicating barrier is required is clear */
- drbd_clear_flag(mdev, CREATE_BARRIER);
-
- memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
-
-}
-
-void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
-{
- spin_lock_irq(&mdev->req_lock);
- _tl_restart(mdev, what);
- spin_unlock_irq(&mdev->req_lock);
+ tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
}
/**
@@ -483,1392 +309,131 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
*/
void tl_abort_disk_io(struct drbd_conf *mdev)
{
- struct drbd_tl_epoch *b;
- struct list_head *le, *tle;
- struct drbd_request *req;
+ struct drbd_tconn *tconn = mdev->tconn;
+ struct drbd_request *req, *r;
- spin_lock_irq(&mdev->req_lock);
- b = mdev->oldest_tle;
- while (b) {
- list_for_each_safe(le, tle, &b->requests) {
- req = list_entry(le, struct drbd_request, tl_requests);
- if (!(req->rq_state & RQ_LOCAL_PENDING))
- continue;
- _req_mod(req, abort_disk_io);
- }
- b = b->next;
- }
-
- list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
- req = list_entry(le, struct drbd_request, tl_requests);
+ spin_lock_irq(&tconn->req_lock);
+ list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) {
if (!(req->rq_state & RQ_LOCAL_PENDING))
continue;
- _req_mod(req, abort_disk_io);
- }
-
- spin_unlock_irq(&mdev->req_lock);
-}
-
-/**
- * cl_wide_st_chg() - true if the state change is a cluster wide one
- * @mdev: DRBD device.
- * @os: old (current) state.
- * @ns: new (wanted) state.
- */
-static int cl_wide_st_chg(struct drbd_conf *mdev,
- union drbd_state os, union drbd_state ns)
-{
- return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
- ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
- (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
- (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
- (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
- (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
- (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
-}
-
-enum drbd_state_rv
-drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
- union drbd_state mask, union drbd_state val)
-{
- unsigned long flags;
- union drbd_state os, ns;
- enum drbd_state_rv rv;
-
- spin_lock_irqsave(&mdev->req_lock, flags);
- os = mdev->state;
- ns.i = (os.i & ~mask.i) | val.i;
- rv = _drbd_set_state(mdev, ns, f, NULL);
- ns = mdev->state;
- spin_unlock_irqrestore(&mdev->req_lock, flags);
-
- return rv;
-}
-
-/**
- * drbd_force_state() - Impose a change which happens outside our control on our state
- * @mdev: DRBD device.
- * @mask: mask of state bits to change.
- * @val: value of new state bits.
- */
-void drbd_force_state(struct drbd_conf *mdev,
- union drbd_state mask, union drbd_state val)
-{
- drbd_change_state(mdev, CS_HARD, mask, val);
-}
-
-static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
-static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
- union drbd_state,
- union drbd_state);
-enum sanitize_state_warnings {
- NO_WARNING,
- ABORTED_ONLINE_VERIFY,
- ABORTED_RESYNC,
- CONNECTION_LOST_NEGOTIATING,
- IMPLICITLY_UPGRADED_DISK,
- IMPLICITLY_UPGRADED_PDSK,
-};
-static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
- union drbd_state ns, enum sanitize_state_warnings *warn);
-int drbd_send_state_req(struct drbd_conf *,
- union drbd_state, union drbd_state);
-
-static enum drbd_state_rv
-_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
- union drbd_state val)
-{
- union drbd_state os, ns;
- unsigned long flags;
- enum drbd_state_rv rv;
-
- if (drbd_test_and_clear_flag(mdev, CL_ST_CHG_SUCCESS))
- return SS_CW_SUCCESS;
-
- if (drbd_test_and_clear_flag(mdev, CL_ST_CHG_FAIL))
- return SS_CW_FAILED_BY_PEER;
-
- rv = 0;
- spin_lock_irqsave(&mdev->req_lock, flags);
- os = mdev->state;
- ns.i = (os.i & ~mask.i) | val.i;
- ns = sanitize_state(mdev, os, ns, NULL);
-
- if (!cl_wide_st_chg(mdev, os, ns))
- rv = SS_CW_NO_NEED;
- if (!rv) {
- rv = is_valid_state(mdev, ns);
- if (rv == SS_SUCCESS) {
- rv = is_valid_state_transition(mdev, ns, os);
- if (rv == SS_SUCCESS)
- rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
- }
- }
- spin_unlock_irqrestore(&mdev->req_lock, flags);
-
- return rv;
-}
-
-/**
- * drbd_req_state() - Perform an eventually cluster wide state change
- * @mdev: DRBD device.
- * @mask: mask of state bits to change.
- * @val: value of new state bits.
- * @f: flags
- *
- * Should not be called directly, use drbd_request_state() or
- * _drbd_request_state().
- */
-static enum drbd_state_rv
-drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
- union drbd_state val, enum chg_state_flags f)
-{
- struct completion done;
- unsigned long flags;
- union drbd_state os, ns;
- enum drbd_state_rv rv;
-
- init_completion(&done);
-
- if (f & CS_SERIALIZE)
- mutex_lock(&mdev->state_mutex);
-
- spin_lock_irqsave(&mdev->req_lock, flags);
- os = mdev->state;
- ns.i = (os.i & ~mask.i) | val.i;
- ns = sanitize_state(mdev, os, ns, NULL);
-
- if (cl_wide_st_chg(mdev, os, ns)) {
- rv = is_valid_state(mdev, ns);
- if (rv == SS_SUCCESS)
- rv = is_valid_state_transition(mdev, ns, os);
- spin_unlock_irqrestore(&mdev->req_lock, flags);
-
- if (rv < SS_SUCCESS) {
- if (f & CS_VERBOSE)
- print_st_err(mdev, os, ns, rv);
- goto abort;
- }
-
- drbd_state_lock(mdev);
- if (!drbd_send_state_req(mdev, mask, val)) {
- drbd_state_unlock(mdev);
- rv = SS_CW_FAILED_BY_PEER;
- if (f & CS_VERBOSE)
- print_st_err(mdev, os, ns, rv);
- goto abort;
- }
-
- if (mask.conn == C_MASK && val.conn == C_DISCONNECTING)
- drbd_set_flag(mdev, DISCONNECT_SENT);
-
- wait_event(mdev->state_wait,
- (rv = _req_st_cond(mdev, mask, val)));
-
- if (rv < SS_SUCCESS) {
- drbd_state_unlock(mdev);
- if (f & CS_VERBOSE)
- print_st_err(mdev, os, ns, rv);
- goto abort;
- }
- spin_lock_irqsave(&mdev->req_lock, flags);
- os = mdev->state;
- ns.i = (os.i & ~mask.i) | val.i;
- rv = _drbd_set_state(mdev, ns, f, &done);
- drbd_state_unlock(mdev);
- } else {
- rv = _drbd_set_state(mdev, ns, f, &done);
- }
-
- spin_unlock_irqrestore(&mdev->req_lock, flags);
-
- if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
- D_ASSERT(current != mdev->worker.task);
- wait_for_completion(&done);
- }
-
-abort:
- if (f & CS_SERIALIZE)
- mutex_unlock(&mdev->state_mutex);
-
- return rv;
-}
-
-/**
- * _drbd_request_state() - Request a state change (with flags)
- * @mdev: DRBD device.
- * @mask: mask of state bits to change.
- * @val: value of new state bits.
- * @f: flags
- *
- * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
- * flag, or when logging of failed state change requests is not desired.
- */
-enum drbd_state_rv
-_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
- union drbd_state val, enum chg_state_flags f)
-{
- enum drbd_state_rv rv;
-
- wait_event(mdev->state_wait,
- (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
-
- return rv;
-}
-
-static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
-{
- dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
- name,
- drbd_conn_str(ns.conn),
- drbd_role_str(ns.role),
- drbd_role_str(ns.peer),
- drbd_disk_str(ns.disk),
- drbd_disk_str(ns.pdsk),
- is_susp(ns) ? 's' : 'r',
- ns.aftr_isp ? 'a' : '-',
- ns.peer_isp ? 'p' : '-',
- ns.user_isp ? 'u' : '-'
- );
-}
-
-void print_st_err(struct drbd_conf *mdev, union drbd_state os,
- union drbd_state ns, enum drbd_state_rv err)
-{
- if (err == SS_IN_TRANSIENT_STATE)
- return;
- dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
- print_st(mdev, " state", os);
- print_st(mdev, "wanted", ns);
-}
-
-
-/**
- * is_valid_state() - Returns an SS_ error code if ns is not valid
- * @mdev: DRBD device.
- * @ns: State to consider.
- */
-static enum drbd_state_rv
-is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
-{
- /* See drbd_state_sw_errors in drbd_strings.c */
-
- enum drbd_fencing_p fp;
- enum drbd_state_rv rv = SS_SUCCESS;
-
- fp = FP_DONT_CARE;
- if (get_ldev(mdev)) {
- fp = mdev->ldev->dc.fencing;
- put_ldev(mdev);
- }
-
- if (get_net_conf(mdev)) {
- if (!mdev->net_conf->two_primaries &&
- ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
- rv = SS_TWO_PRIMARIES;
- put_net_conf(mdev);
- }
-
- if (rv <= 0)
- /* already found a reason to abort */;
- else if (ns.role == R_SECONDARY && mdev->open_cnt)
- rv = SS_DEVICE_IN_USE;
-
- else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
- rv = SS_NO_UP_TO_DATE_DISK;
-
- else if (fp >= FP_RESOURCE &&
- ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
- rv = SS_PRIMARY_NOP;
-
- else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
- rv = SS_NO_UP_TO_DATE_DISK;
-
- else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
- rv = SS_NO_LOCAL_DISK;
-
- else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
- rv = SS_NO_REMOTE_DISK;
-
- else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
- rv = SS_NO_UP_TO_DATE_DISK;
-
- else if ((ns.conn == C_CONNECTED ||
- ns.conn == C_WF_BITMAP_S ||
- ns.conn == C_SYNC_SOURCE ||
- ns.conn == C_PAUSED_SYNC_S) &&
- ns.disk == D_OUTDATED)
- rv = SS_CONNECTED_OUTDATES;
-
- else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
- (mdev->sync_conf.verify_alg[0] == 0))
- rv = SS_NO_VERIFY_ALG;
-
- else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
- mdev->agreed_pro_version < 88)
- rv = SS_NOT_SUPPORTED;
-
- else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
- rv = SS_CONNECTED_OUTDATES;
-
- return rv;
-}
-
-/**
- * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
- * @mdev: DRBD device.
- * @ns: new state.
- * @os: old state.
- */
-static enum drbd_state_rv
-is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
- union drbd_state os)
-{
- enum drbd_state_rv rv = SS_SUCCESS;
-
- if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
- os.conn > C_CONNECTED)
- rv = SS_RESYNC_RUNNING;
-
- if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
- rv = SS_ALREADY_STANDALONE;
-
- if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
- rv = SS_IS_DISKLESS;
-
- if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
- rv = SS_NO_NET_CONFIG;
-
- if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
- rv = SS_LOWER_THAN_OUTDATED;
-
- if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
- rv = SS_IN_TRANSIENT_STATE;
-
- if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
- rv = SS_IN_TRANSIENT_STATE;
-
- /* While establishing a connection only allow cstate to change.
- Delay/refuse role changes, detach attach etc... */
- if (drbd_test_flag(mdev, STATE_SENT) &&
- !(os.conn == C_WF_REPORT_PARAMS ||
- (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
- rv = SS_IN_TRANSIENT_STATE;
-
- if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
- rv = SS_NEED_CONNECTION;
-
- if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
- ns.conn != os.conn && os.conn > C_CONNECTED)
- rv = SS_RESYNC_RUNNING;
-
- if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
- os.conn < C_CONNECTED)
- rv = SS_NEED_CONNECTION;
-
- if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
- && os.conn < C_WF_REPORT_PARAMS)
- rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
-
- return rv;
-}
-
-static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
-{
- static const char *msg_table[] = {
- [NO_WARNING] = "",
- [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
- [ABORTED_RESYNC] = "Resync aborted.",
- [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
- [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
- [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
- };
-
- if (warn != NO_WARNING)
- dev_warn(DEV, "%s\n", msg_table[warn]);
-}
-
-/**
- * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
- * @mdev: DRBD device.
- * @os: old state.
- * @ns: new state.
- * @warn_sync_abort:
- *
- * When we loose connection, we have to set the state of the peers disk (pdsk)
- * to D_UNKNOWN. This rule and many more along those lines are in this function.
- */
-static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
- union drbd_state ns, enum sanitize_state_warnings *warn)
-{
- enum drbd_fencing_p fp;
- enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
-
- if (warn)
- *warn = NO_WARNING;
-
- fp = FP_DONT_CARE;
- if (get_ldev(mdev)) {
- fp = mdev->ldev->dc.fencing;
- put_ldev(mdev);
- }
-
- /* Disallow Network errors to configure a device's network part */
- if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
- os.conn <= C_DISCONNECTING)
- ns.conn = os.conn;
-
- /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
- * If you try to go into some Sync* state, that shall fail (elsewhere). */
- if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
- ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)
- ns.conn = os.conn;
-
- /* we cannot fail (again) if we already detached */
- if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
- ns.disk = D_DISKLESS;
-
- /* After C_DISCONNECTING only C_STANDALONE may follow */
- if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
- ns.conn = os.conn;
-
- if (ns.conn < C_CONNECTED) {
- ns.peer_isp = 0;
- ns.peer = R_UNKNOWN;
- if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
- ns.pdsk = D_UNKNOWN;
- }
-
- /* Clear the aftr_isp when becoming unconfigured */
- if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
- ns.aftr_isp = 0;
-
- /* Abort resync if a disk fails/detaches */
- if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
- (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
- if (warn)
- *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
- ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
- ns.conn = C_CONNECTED;
- }
-
- /* Connection breaks down before we finished "Negotiating" */
- if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
- get_ldev_if_state(mdev, D_NEGOTIATING)) {
- if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
- ns.disk = mdev->new_state_tmp.disk;
- ns.pdsk = mdev->new_state_tmp.pdsk;
- } else {
- if (warn)
- *warn = CONNECTION_LOST_NEGOTIATING;
- ns.disk = D_DISKLESS;
- ns.pdsk = D_UNKNOWN;
- }
- put_ldev(mdev);
- }
-
- /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
- if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
- if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
- ns.disk = D_UP_TO_DATE;
- if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
- ns.pdsk = D_UP_TO_DATE;
- }
-
- /* Implications of the connection stat on the disk states */
- disk_min = D_DISKLESS;
- disk_max = D_UP_TO_DATE;
- pdsk_min = D_INCONSISTENT;
- pdsk_max = D_UNKNOWN;
- switch ((enum drbd_conns)ns.conn) {
- case C_WF_BITMAP_T:
- case C_PAUSED_SYNC_T:
- case C_STARTING_SYNC_T:
- case C_WF_SYNC_UUID:
- case C_BEHIND:
- disk_min = D_INCONSISTENT;
- disk_max = D_OUTDATED;
- pdsk_min = D_UP_TO_DATE;
- pdsk_max = D_UP_TO_DATE;
- break;
- case C_VERIFY_S:
- case C_VERIFY_T:
- disk_min = D_UP_TO_DATE;
- disk_max = D_UP_TO_DATE;
- pdsk_min = D_UP_TO_DATE;
- pdsk_max = D_UP_TO_DATE;
- break;
- case C_CONNECTED:
- disk_min = D_DISKLESS;
- disk_max = D_UP_TO_DATE;
- pdsk_min = D_DISKLESS;
- pdsk_max = D_UP_TO_DATE;
- break;
- case C_WF_BITMAP_S:
- case C_PAUSED_SYNC_S:
- case C_STARTING_SYNC_S:
- case C_AHEAD:
- disk_min = D_UP_TO_DATE;
- disk_max = D_UP_TO_DATE;
- pdsk_min = D_INCONSISTENT;
- pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
- break;
- case C_SYNC_TARGET:
- disk_min = D_INCONSISTENT;
- disk_max = D_INCONSISTENT;
- pdsk_min = D_UP_TO_DATE;
- pdsk_max = D_UP_TO_DATE;
- break;
- case C_SYNC_SOURCE:
- disk_min = D_UP_TO_DATE;
- disk_max = D_UP_TO_DATE;
- pdsk_min = D_INCONSISTENT;
- pdsk_max = D_INCONSISTENT;
- break;
- case C_STANDALONE:
- case C_DISCONNECTING:
- case C_UNCONNECTED:
- case C_TIMEOUT:
- case C_BROKEN_PIPE:
- case C_NETWORK_FAILURE:
- case C_PROTOCOL_ERROR:
- case C_TEAR_DOWN:
- case C_WF_CONNECTION:
- case C_WF_REPORT_PARAMS:
- case C_MASK:
- break;
- }
- if (ns.disk > disk_max)
- ns.disk = disk_max;
-
- if (ns.disk < disk_min) {
- if (warn)
- *warn = IMPLICITLY_UPGRADED_DISK;
- ns.disk = disk_min;
- }
- if (ns.pdsk > pdsk_max)
- ns.pdsk = pdsk_max;
-
- if (ns.pdsk < pdsk_min) {
- if (warn)
- *warn = IMPLICITLY_UPGRADED_PDSK;
- ns.pdsk = pdsk_min;
- }
-
- if (fp == FP_STONITH &&
- (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
- !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
- ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
-
- if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
- (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
- !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
- ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
-
- if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
- if (ns.conn == C_SYNC_SOURCE)
- ns.conn = C_PAUSED_SYNC_S;
- if (ns.conn == C_SYNC_TARGET)
- ns.conn = C_PAUSED_SYNC_T;
- } else {
- if (ns.conn == C_PAUSED_SYNC_S)
- ns.conn = C_SYNC_SOURCE;
- if (ns.conn == C_PAUSED_SYNC_T)
- ns.conn = C_SYNC_TARGET;
- }
-
- return ns;
-}
-
-/* helper for __drbd_set_state */
-static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
-{
- if (mdev->agreed_pro_version < 90)
- mdev->ov_start_sector = 0;
- mdev->rs_total = drbd_bm_bits(mdev);
- mdev->ov_position = 0;
- if (cs == C_VERIFY_T) {
- /* starting online verify from an arbitrary position
- * does not fit well into the existing protocol.
- * on C_VERIFY_T, we initialize ov_left and friends
- * implicitly in receive_DataRequest once the
- * first P_OV_REQUEST is received */
- mdev->ov_start_sector = ~(sector_t)0;
- } else {
- unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
- if (bit >= mdev->rs_total) {
- mdev->ov_start_sector =
- BM_BIT_TO_SECT(mdev->rs_total - 1);
- mdev->rs_total = 1;
- } else
- mdev->rs_total -= bit;
- mdev->ov_position = mdev->ov_start_sector;
- }
- mdev->ov_left = mdev->rs_total;
-}
-
-static void drbd_resume_al(struct drbd_conf *mdev)
-{
- if (drbd_test_and_clear_flag(mdev, AL_SUSPENDED))
- dev_info(DEV, "Resumed AL updates\n");
-}
-
-/**
- * __drbd_set_state() - Set a new DRBD state
- * @mdev: DRBD device.
- * @ns: new state.
- * @flags: Flags
- * @done: Optional completion, that will get completed after the after_state_ch() finished
- *
- * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
- */
-enum drbd_state_rv
-__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
- enum chg_state_flags flags, struct completion *done)
-{
- union drbd_state os;
- enum drbd_state_r