diff options
Diffstat (limited to 'drivers/block/drbd/drbd_main.c')
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 3773 |
1 files changed, 1445 insertions, 2328 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9b833e0fb44..52de26daa1f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -56,14 +56,6 @@ #include "drbd_vli.h" -struct after_state_chg_work { - struct drbd_work w; - union drbd_state os; - union drbd_state ns; - enum chg_state_flags flags; - struct completion *done; -}; - static DEFINE_MUTEX(drbd_main_mutex); int drbdd_init(struct drbd_thread *); int drbd_worker(struct drbd_thread *); @@ -72,21 +64,17 @@ int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); -static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags); -static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static int w_md_sync(struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); -static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); -static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); -static void _tl_clear(struct drbd_conf *mdev); +static int w_bitmap_io(struct drbd_work *w, int unused); +static int w_go_diskless(struct drbd_work *w, int unused); MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " "Lars Ellenberg <lars@linbit.com>"); MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); MODULE_VERSION(REL_VERSION); MODULE_LICENSE("GPL"); -MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (" +MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices (" __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")"); MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); @@ -98,7 +86,6 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!"); module_param(minor_count, uint, 0444); module_param(disable_sendpage, bool, 0644); module_param(allow_oos, bool, 0); -module_param(cn_idx, uint, 0444); module_param(proc_details, int, 0644); #ifdef CONFIG_DRBD_FAULT_INJECTION @@ -120,7 +107,6 @@ module_param(fault_devs, int, 0644); unsigned int minor_count = DRBD_MINOR_COUNT_DEF; bool disable_sendpage; bool allow_oos; -unsigned int cn_idx = CN_IDX_DRBD; int proc_details; /* Detail level in proc drbd*/ /* Module parameter for setting the user mode helper program @@ -132,10 +118,11 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 /* in 2.6.x, our device mapping and config info contains our virtual gendisks * as member "struct gendisk *vdisk;" */ -struct drbd_conf **minor_table; +struct idr minors; +struct list_head drbd_tconns; /* list of struct drbd_tconn */ struct kmem_cache *drbd_request_cache; -struct kmem_cache *drbd_ee_cache; /* epoch entries */ +struct kmem_cache *drbd_ee_cache; /* peer requests */ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; @@ -164,10 +151,15 @@ static const struct block_device_operations drbd_ops = { struct bio *bio_alloc_drbd(gfp_t gfp_mask) { + struct bio *bio; + if (!drbd_md_io_bio_set) return bio_alloc(gfp_mask, 1); - return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); + bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); + if (!bio) + return NULL; + return bio; } #ifdef __CHECKER__ @@ -190,158 +182,87 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) #endif /** - * DOC: The transfer log - * - * The transfer log is a single linked list of &struct drbd_tl_epoch objects. - * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail - * of the list. There is always at least one &struct drbd_tl_epoch object. - * - * Each &struct drbd_tl_epoch has a circular double linked list of requests - * attached. - */ -static int tl_init(struct drbd_conf *mdev) -{ - struct drbd_tl_epoch *b; - - /* during device minor initialization, we may well use GFP_KERNEL */ - b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL); - if (!b) - return 0; - INIT_LIST_HEAD(&b->requests); - INIT_LIST_HEAD(&b->w.list); - b->next = NULL; - b->br_number = 4711; - b->n_writes = 0; - b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - - mdev->oldest_tle = b; - mdev->newest_tle = b; - INIT_LIST_HEAD(&mdev->out_of_sequence_requests); - INIT_LIST_HEAD(&mdev->barrier_acked_requests); - - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; - - return 1; -} - -static void tl_cleanup(struct drbd_conf *mdev) -{ - D_ASSERT(mdev->oldest_tle == mdev->newest_tle); - D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); - kfree(mdev->oldest_tle); - mdev->oldest_tle = NULL; - kfree(mdev->unused_spare_tle); - mdev->unused_spare_tle = NULL; - kfree(mdev->tl_hash); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; -} - -/** - * _tl_add_barrier() - Adds a barrier to the transfer log - * @mdev: DRBD device. - * @new: Barrier to be added before the current head of the TL. - * - * The caller must hold the req_lock. - */ -void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) -{ - struct drbd_tl_epoch *newest_before; - - INIT_LIST_HEAD(&new->requests); - INIT_LIST_HEAD(&new->w.list); - new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - new->next = NULL; - new->n_writes = 0; - - newest_before = mdev->newest_tle; - new->br_number = newest_before->br_number+1; - if (mdev->newest_tle != new) { - mdev->newest_tle->next = new; - mdev->newest_tle = new; - } -} - -/** - * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL - * @mdev: DRBD device. + * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch + * @tconn: DRBD connection. * @barrier_nr: Expected identifier of the DRBD write barrier packet. * @set_size: Expected number of requests before that barrier. * * In case the passed barrier_nr or set_size does not match the oldest - * &struct drbd_tl_epoch objects this function will cause a termination - * of the connection. + * epoch of not yet barrier-acked requests, this function will cause a + * termination of the connection. */ -void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, - unsigned int set_size) +void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, + unsigned int set_size) { - struct drbd_tl_epoch *b, *nob; /* next old barrier */ - struct list_head *le, *tle; struct drbd_request *r; - - spin_lock_irq(&mdev->req_lock); - - b = mdev->oldest_tle; + struct drbd_request *req = NULL; + int expect_epoch = 0; + int expect_size = 0; + + spin_lock_irq(&tconn->req_lock); + + /* find oldest not yet barrier-acked write request, + * count writes in its epoch. */ + list_for_each_entry(r, &tconn->transfer_log, tl_requests) { + const unsigned s = r->rq_state; + if (!req) { + if (!(s & RQ_WRITE)) + continue; + if (!(s & RQ_NET_MASK)) + continue; + if (s & RQ_NET_DONE) + continue; + req = r; + expect_epoch = req->epoch; + expect_size ++; + } else { + if (r->epoch != expect_epoch) + break; + if (!(s & RQ_WRITE)) + continue; + /* if (s & RQ_DONE): not expected */ + /* if (!(s & RQ_NET_MASK)): not expected */ + expect_size++; + } + } /* first some paranoia code */ - if (b == NULL) { - dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", - barrier_nr); + if (req == NULL) { + conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", + barrier_nr); goto bail; } - if (b->br_number != barrier_nr) { - dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n", - barrier_nr, b->br_number); + if (expect_epoch != barrier_nr) { + conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n", + barrier_nr, expect_epoch); goto bail; } - if (b->n_writes != set_size) { - dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", - barrier_nr, set_size, b->n_writes); + + if (expect_size != set_size) { + conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", + barrier_nr, set_size, expect_size); goto bail; } - /* Clean up list of requests processed during current epoch */ - list_for_each_safe(le, tle, &b->requests) { - r = list_entry(le, struct drbd_request, tl_requests); - _req_mod(r, barrier_acked); - } - /* There could be requests on the list waiting for completion - of the write to the local disk. To avoid corruptions of - slab's data structures we have to remove the lists head. - - Also there could have been a barrier ack out of sequence, overtaking - the write acks - which would be a bug and violating write ordering. - To not deadlock in case we lose connection while such requests are - still pending, we need some way to find them for the - _req_mode(connection_lost_while_pending). - - These have been list_move'd to the out_of_sequence_requests list in - _req_mod(, barrier_acked) above. - */ - list_splice_init(&b->requests, &mdev->barrier_acked_requests); - - nob = b->next; - if (drbd_test_and_clear_flag(mdev, CREATE_BARRIER)) { - _tl_add_barrier(mdev, b); - if (nob) - mdev->oldest_tle = nob; - /* if nob == NULL b was the only barrier, and becomes the new - barrier. Therefore mdev->oldest_tle points already to b */ - } else { - D_ASSERT(nob != NULL); - mdev->oldest_tle = nob; - kfree(b); + /* Clean up list of requests processed during current epoch. */ + /* this extra list walk restart is paranoia, + * to catch requests being barrier-acked "unexpectedly". + * It usually should find the same req again, or some READ preceding it. */ + list_for_each_entry(req, &tconn->transfer_log, tl_requests) + if (req->epoch == expect_epoch) + break; + list_for_each_entry_safe_from(req, r, &tconn->transfer_log, tl_requests) { + if (req->epoch != expect_epoch) + break; + _req_mod(req, BARRIER_ACKED); } - - spin_unlock_irq(&mdev->req_lock); - dec_ap_pending(mdev); + spin_unlock_irq(&tconn->req_lock); return; bail: - spin_unlock_irq(&mdev->req_lock); - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + spin_unlock_irq(&tconn->req_lock); + conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); } @@ -350,85 +271,24 @@ bail: * @mdev: DRBD device. * @what: The action/event to perform with all request objects * - * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io, - * restart_frozen_disk_io. + * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, + * RESTART_FROZEN_DISK_IO. */ -static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) -{ - struct drbd_tl_epoch *b, *tmp, **pn; - struct list_head *le, *tle, carry_reads; - struct drbd_request *req; - int rv, n_writes, n_reads; - - b = mdev->oldest_tle; - pn = &mdev->oldest_tle; - while (b) { - n_writes = 0; - n_reads = 0; - INIT_LIST_HEAD(&carry_reads); - list_for_each_safe(le, tle, &b->requests) { - req = list_entry(le, struct drbd_request, tl_requests); - rv = _req_mod(req, what); - - n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT; - n_reads += (rv & MR_READ) >> MR_READ_SHIFT; - } - tmp = b->next; - - if (n_writes) { - if (what == resend) { - b->n_writes = n_writes; - if (b->w.cb == NULL) { - b->w.cb = w_send_barrier; - inc_ap_pending(mdev); - drbd_set_flag(mdev, CREATE_BARRIER); - } - - drbd_queue_work(&mdev->data.work, &b->w); - } - pn = &b->next; - } else { - if (n_reads) - list_add(&carry_reads, &b->requests); - /* there could still be requests on that ring list, - * in case local io is still pending */ - list_del(&b->requests); - - /* dec_ap_pending corresponding to queue_barrier. - * the newest barrier may not have been queued yet, - * in which case w.cb is still NULL. */ - if (b->w.cb != NULL) - dec_ap_pending(mdev); - - if (b == mdev->newest_tle) { - /* recycle, but reinit! */ - D_ASSERT(tmp == NULL); - INIT_LIST_HEAD(&b->requests); - list_splice(&carry_reads, &b->requests); - INIT_LIST_HEAD(&b->w.list); - b->w.cb = NULL; - b->br_number = net_random(); - b->n_writes = 0; - - *pn = b; - break; - } - *pn = tmp; - kfree(b); - } - b = tmp; - list_splice(&carry_reads, &b->requests); - } - - /* Actions operating on the disk state, also want to work on - requests that got barrier acked. */ +/* must hold resource->req_lock */ +void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) +{ + struct drbd_request *req, *r; - list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { - req = list_entry(le, struct drbd_request, tl_requests); + list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) _req_mod(req, what); - } } +void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) +{ + spin_lock_irq(&tconn->req_lock); + _tl_restart(tconn, what); + spin_unlock_irq(&tconn->req_lock); +} /** * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL @@ -438,43 +298,9 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) * by the requests on the transfer gets marked as our of sync. Called from the * receiver thread and the worker thread. */ -void tl_clear(struct drbd_conf *mdev) +void tl_clear(struct drbd_tconn *tconn) { - spin_lock_irq(&mdev->req_lock); - _tl_clear(mdev); - spin_unlock_irq(&mdev->req_lock); -} - -static void _tl_clear(struct drbd_conf *mdev) -{ - struct list_head *le, *tle; - struct drbd_request *r; - - _tl_restart(mdev, connection_lost_while_pending); - - /* we expect this list to be empty. */ - D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); - - /* but just in case, clean it up anyways! */ - list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) { - r = list_entry(le, struct drbd_request, tl_requests); - /* It would be nice to complete outside of spinlock. - * But this is easier for now. */ - _req_mod(r, connection_lost_while_pending); - } - - /* ensure bit indicating barrier is required is clear */ - drbd_clear_flag(mdev, CREATE_BARRIER); - - memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); - -} - -void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) -{ - spin_lock_irq(&mdev->req_lock); - _tl_restart(mdev, what); - spin_unlock_irq(&mdev->req_lock); + tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); } /** @@ -483,1392 +309,131 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) */ void tl_abort_disk_io(struct drbd_conf *mdev) { - struct drbd_tl_epoch *b; - struct list_head *le, *tle; - struct drbd_request *req; + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_request *req, *r; - spin_lock_irq(&mdev->req_lock); - b = mdev->oldest_tle; - while (b) { - list_for_each_safe(le, tle, &b->requests) { - req = list_entry(le, struct drbd_request, tl_requests); - if (!(req->rq_state & RQ_LOCAL_PENDING)) - continue; - _req_mod(req, abort_disk_io); - } - b = b->next; - } - - list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { - req = list_entry(le, struct drbd_request, tl_requests); + spin_lock_irq(&tconn->req_lock); + list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) { if (!(req->rq_state & RQ_LOCAL_PENDING)) continue; - _req_mod(req, abort_disk_io); - } - - spin_unlock_irq(&mdev->req_lock); -} - -/** - * cl_wide_st_chg() - true if the state change is a cluster wide one - * @mdev: DRBD device. - * @os: old (current) state. - * @ns: new (wanted) state. - */ -static int cl_wide_st_chg(struct drbd_conf *mdev, - union drbd_state os, union drbd_state ns) -{ - return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && - ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || - (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || - (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || - (os.disk != D_FAILED && ns.disk == D_FAILED))) || - (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || - (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); -} - -enum drbd_state_rv -drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, - union drbd_state mask, union drbd_state val) -{ - unsigned long flags; - union drbd_state os, ns; - enum drbd_state_rv rv; - - spin_lock_irqsave(&mdev->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - rv = _drbd_set_state(mdev, ns, f, NULL); - ns = mdev->state; - spin_unlock_irqrestore(&mdev->req_lock, flags); - - return rv; -} - -/** - * drbd_force_state() - Impose a change which happens outside our control on our state - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - */ -void drbd_force_state(struct drbd_conf *mdev, - union drbd_state mask, union drbd_state val) -{ - drbd_change_state(mdev, CS_HARD, mask, val); -} - -static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, - union drbd_state, - union drbd_state); -enum sanitize_state_warnings { - NO_WARNING, - ABORTED_ONLINE_VERIFY, - ABORTED_RESYNC, - CONNECTION_LOST_NEGOTIATING, - IMPLICITLY_UPGRADED_DISK, - IMPLICITLY_UPGRADED_PDSK, -}; -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum sanitize_state_warnings *warn); -int drbd_send_state_req(struct drbd_conf *, - union drbd_state, union drbd_state); - -static enum drbd_state_rv -_req_st_cond(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val) -{ - union drbd_state os, ns; - unsigned long flags; - enum drbd_state_rv rv; - - if (drbd_test_and_clear_flag(mdev, CL_ST_CHG_SUCCESS)) - return SS_CW_SUCCESS; - - if (drbd_test_and_clear_flag(mdev, CL_ST_CHG_FAIL)) - return SS_CW_FAILED_BY_PEER; - - rv = 0; - spin_lock_irqsave(&mdev->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); - - if (!cl_wide_st_chg(mdev, os, ns)) - rv = SS_CW_NO_NEED; - if (!rv) { - rv = is_valid_state(mdev, ns); - if (rv == SS_SUCCESS) { - rv = is_valid_state_transition(mdev, ns, os); - if (rv == SS_SUCCESS) - rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ - } - } - spin_unlock_irqrestore(&mdev->req_lock, flags); - - return rv; -} - -/** - * drbd_req_state() - Perform an eventually cluster wide state change - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - * @f: flags - * - * Should not be called directly, use drbd_request_state() or - * _drbd_request_state(). - */ -static enum drbd_state_rv -drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val, enum chg_state_flags f) -{ - struct completion done; - unsigned long flags; - union drbd_state os, ns; - enum drbd_state_rv rv; - - init_completion(&done); - - if (f & CS_SERIALIZE) - mutex_lock(&mdev->state_mutex); - - spin_lock_irqsave(&mdev->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); - - if (cl_wide_st_chg(mdev, os, ns)) { - rv = is_valid_state(mdev, ns); - if (rv == SS_SUCCESS) - rv = is_valid_state_transition(mdev, ns, os); - spin_unlock_irqrestore(&mdev->req_lock, flags); - - if (rv < SS_SUCCESS) { - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - - drbd_state_lock(mdev); - if (!drbd_send_state_req(mdev, mask, val)) { - drbd_state_unlock(mdev); - rv = SS_CW_FAILED_BY_PEER; - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - - if (mask.conn == C_MASK && val.conn == C_DISCONNECTING) - drbd_set_flag(mdev, DISCONNECT_SENT); - - wait_event(mdev->state_wait, - (rv = _req_st_cond(mdev, mask, val))); - - if (rv < SS_SUCCESS) { - drbd_state_unlock(mdev); - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - spin_lock_irqsave(&mdev->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - rv = _drbd_set_state(mdev, ns, f, &done); - drbd_state_unlock(mdev); - } else { - rv = _drbd_set_state(mdev, ns, f, &done); - } - - spin_unlock_irqrestore(&mdev->req_lock, flags); - - if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { - D_ASSERT(current != mdev->worker.task); - wait_for_completion(&done); - } - -abort: - if (f & CS_SERIALIZE) - mutex_unlock(&mdev->state_mutex); - - return rv; -} - -/** - * _drbd_request_state() - Request a state change (with flags) - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - * @f: flags - * - * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE - * flag, or when logging of failed state change requests is not desired. - */ -enum drbd_state_rv -_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val, enum chg_state_flags f) -{ - enum drbd_state_rv rv; - - wait_event(mdev->state_wait, - (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); - - return rv; -} - -static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) -{ - dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", - name, - drbd_conn_str(ns.conn), - drbd_role_str(ns.role), - drbd_role_str(ns.peer), - drbd_disk_str(ns.disk), - drbd_disk_str(ns.pdsk), - is_susp(ns) ? 's' : 'r', - ns.aftr_isp ? 'a' : '-', - ns.peer_isp ? 'p' : '-', - ns.user_isp ? 'u' : '-' - ); -} - -void print_st_err(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum drbd_state_rv err) -{ - if (err == SS_IN_TRANSIENT_STATE) - return; - dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); - print_st(mdev, " state", os); - print_st(mdev, "wanted", ns); -} - - -/** - * is_valid_state() - Returns an SS_ error code if ns is not valid - * @mdev: DRBD device. - * @ns: State to consider. - */ -static enum drbd_state_rv -is_valid_state(struct drbd_conf *mdev, union drbd_state ns) -{ - /* See drbd_state_sw_errors in drbd_strings.c */ - - enum drbd_fencing_p fp; - enum drbd_state_rv rv = SS_SUCCESS; - - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } - - if (get_net_conf(mdev)) { - if (!mdev->net_conf->two_primaries && - ns.role == R_PRIMARY && ns.peer == R_PRIMARY) - rv = SS_TWO_PRIMARIES; - put_net_conf(mdev); - } - - if (rv <= 0) - /* already found a reason to abort */; - else if (ns.role == R_SECONDARY && mdev->open_cnt) - rv = SS_DEVICE_IN_USE; - - else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) - rv = SS_NO_UP_TO_DATE_DISK; - - else if (fp >= FP_RESOURCE && - ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) - rv = SS_PRIMARY_NOP; - - else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) - rv = SS_NO_UP_TO_DATE_DISK; - - else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) - rv = SS_NO_LOCAL_DISK; - - else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) - rv = SS_NO_REMOTE_DISK; - - else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) - rv = SS_NO_UP_TO_DATE_DISK; - - else if ((ns.conn == C_CONNECTED || - ns.conn == C_WF_BITMAP_S || - ns.conn == C_SYNC_SOURCE || - ns.conn == C_PAUSED_SYNC_S) && - ns.disk == D_OUTDATED) - rv = SS_CONNECTED_OUTDATES; - - else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - (mdev->sync_conf.verify_alg[0] == 0)) - rv = SS_NO_VERIFY_ALG; - - else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - mdev->agreed_pro_version < 88) - rv = SS_NOT_SUPPORTED; - - else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) - rv = SS_CONNECTED_OUTDATES; - - return rv; -} - -/** - * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible - * @mdev: DRBD device. - * @ns: new state. - * @os: old state. - */ -static enum drbd_state_rv -is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, - union drbd_state os) -{ - enum drbd_state_rv rv = SS_SUCCESS; - - if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && - os.conn > C_CONNECTED) - rv = SS_RESYNC_RUNNING; - - if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) - rv = SS_ALREADY_STANDALONE; - - if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) - rv = SS_IS_DISKLESS; - - if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) - rv = SS_NO_NET_CONFIG; - - if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) - rv = SS_LOWER_THAN_OUTDATED; - - if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) - rv = SS_IN_TRANSIENT_STATE; - - if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) - rv = SS_IN_TRANSIENT_STATE; - - /* While establishing a connection only allow cstate to change. - Delay/refuse role changes, detach attach etc... */ - if (drbd_test_flag(mdev, STATE_SENT) && - !(os.conn == C_WF_REPORT_PARAMS || - (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) - rv = SS_IN_TRANSIENT_STATE; - - if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) - rv = SS_NEED_CONNECTION; - - if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - ns.conn != os.conn && os.conn > C_CONNECTED) - rv = SS_RESYNC_RUNNING; - - if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && - os.conn < C_CONNECTED) - rv = SS_NEED_CONNECTION; - - if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) - && os.conn < C_WF_REPORT_PARAMS) - rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ - - return rv; -} - -static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) -{ - static const char *msg_table[] = { - [NO_WARNING] = "", - [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", - [ABORTED_RESYNC] = "Resync aborted.", - [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", - [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", - [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", - }; - - if (warn != NO_WARNING) - dev_warn(DEV, "%s\n", msg_table[warn]); -} - -/** - * sanitize_state() - Resolves implicitly necessary additional changes to a state transition - * @mdev: DRBD device. - * @os: old state. - * @ns: new state. - * @warn_sync_abort: - * - * When we loose connection, we have to set the state of the peers disk (pdsk) - * to D_UNKNOWN. This rule and many more along those lines are in this function. - */ -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum sanitize_state_warnings *warn) -{ - enum drbd_fencing_p fp; - enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; - - if (warn) - *warn = NO_WARNING; - - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } - - /* Disallow Network errors to configure a device's network part */ - if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && - os.conn <= C_DISCONNECTING) - ns.conn = os.conn; - - /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. - * If you try to go into some Sync* state, that shall fail (elsewhere). */ - if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && - ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED) - ns.conn = os.conn; - - /* we cannot fail (again) if we already detached */ - if (ns.disk == D_FAILED && os.disk == D_DISKLESS) - ns.disk = D_DISKLESS; - - /* After C_DISCONNECTING only C_STANDALONE may follow */ - if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) - ns.conn = os.conn; - - if (ns.conn < C_CONNECTED) { - ns.peer_isp = 0; - ns.peer = R_UNKNOWN; - if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) - ns.pdsk = D_UNKNOWN; - } - - /* Clear the aftr_isp when becoming unconfigured */ - if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) - ns.aftr_isp = 0; - - /* Abort resync if a disk fails/detaches */ - if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && - (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - if (warn) - *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? - ABORTED_ONLINE_VERIFY : ABORTED_RESYNC; - ns.conn = C_CONNECTED; - } - - /* Connection breaks down before we finished "Negotiating" */ - if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && - get_ldev_if_state(mdev, D_NEGOTIATING)) { - if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { - ns.disk = mdev->new_state_tmp.disk; - ns.pdsk = mdev->new_state_tmp.pdsk; - } else { - if (warn) - *warn = CONNECTION_LOST_NEGOTIATING; - ns.disk = D_DISKLESS; - ns.pdsk = D_UNKNOWN; - } - put_ldev(mdev); - } - - /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ - if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { - if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) - ns.disk = D_UP_TO_DATE; - if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) - ns.pdsk = D_UP_TO_DATE; - } - - /* Implications of the connection stat on the disk states */ - disk_min = D_DISKLESS; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_UNKNOWN; - switch ((enum drbd_conns)ns.conn) { - case C_WF_BITMAP_T: - case C_PAUSED_SYNC_T: - case C_STARTING_SYNC_T: - case C_WF_SYNC_UUID: - case C_BEHIND: - disk_min = D_INCONSISTENT; - disk_max = D_OUTDATED; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_VERIFY_S: - case C_VERIFY_T: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_CONNECTED: - disk_min = D_DISKLESS; - disk_max = D_UP_TO_DATE; - pdsk_min = D_DISKLESS; - pdsk_max = D_UP_TO_DATE; - break; - case C_WF_BITMAP_S: - case C_PAUSED_SYNC_S: - case C_STARTING_SYNC_S: - case C_AHEAD: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ - break; - case C_SYNC_TARGET: - disk_min = D_INCONSISTENT; - disk_max = D_INCONSISTENT; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_SYNC_SOURCE: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_INCONSISTENT; - break; - case C_STANDALONE: - case C_DISCONNECTING: - case C_UNCONNECTED: - case C_TIMEOUT: - case C_BROKEN_PIPE: - case C_NETWORK_FAILURE: - case C_PROTOCOL_ERROR: - case C_TEAR_DOWN: - case C_WF_CONNECTION: - case C_WF_REPORT_PARAMS: - case C_MASK: - break; - } - if (ns.disk > disk_max) - ns.disk = disk_max; - - if (ns.disk < disk_min) { - if (warn) - *warn = IMPLICITLY_UPGRADED_DISK; - ns.disk = disk_min; - } - if (ns.pdsk > pdsk_max) - ns.pdsk = pdsk_max; - - if (ns.pdsk < pdsk_min) { - if (warn) - *warn = IMPLICITLY_UPGRADED_PDSK; - ns.pdsk = pdsk_min; - } - - if (fp == FP_STONITH && - (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && - !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) - ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ - - if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && - (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && - !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) - ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ - - if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { - if (ns.conn == C_SYNC_SOURCE) - ns.conn = C_PAUSED_SYNC_S; - if (ns.conn == C_SYNC_TARGET) - ns.conn = C_PAUSED_SYNC_T; - } else { - if (ns.conn == C_PAUSED_SYNC_S) - ns.conn = C_SYNC_SOURCE; - if (ns.conn == C_PAUSED_SYNC_T) - ns.conn = C_SYNC_TARGET; - } - - return ns; -} - -/* helper for __drbd_set_state */ -static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) -{ - if (mdev->agreed_pro_version < 90) - mdev->ov_start_sector = 0; - mdev->rs_total = drbd_bm_bits(mdev); - mdev->ov_position = 0; - if (cs == C_VERIFY_T) { - /* starting online verify from an arbitrary position - * does not fit well into the existing protocol. - * on C_VERIFY_T, we initialize ov_left and friends - * implicitly in receive_DataRequest once the - * first P_OV_REQUEST is received */ - mdev->ov_start_sector = ~(sector_t)0; - } else { - unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); - if (bit >= mdev->rs_total) { - mdev->ov_start_sector = - BM_BIT_TO_SECT(mdev->rs_total - 1); - mdev->rs_total = 1; - } else - mdev->rs_total -= bit; - mdev->ov_position = mdev->ov_start_sector; - } - mdev->ov_left = mdev->rs_total; -} - -static void drbd_resume_al(struct drbd_conf *mdev) -{ - if (drbd_test_and_clear_flag(mdev, AL_SUSPENDED)) - dev_info(DEV, "Resumed AL updates\n"); -} - -/** - * __drbd_set_state() - Set a new DRBD state - * @mdev: DRBD device. - * @ns: new state. - * @flags: Flags - * @done: Optional completion, that will get completed after the after_state_ch() finished - * - * Caller needs to hold req_lock, and global_state_lock. Do not call directly. - */ -enum drbd_state_rv -__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, - enum chg_state_flags flags, struct completion *done) -{ - union drbd_state os; - enum drbd_state_r |