aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_nl.c
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2012-11-09 14:18:43 +0100
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-09 14:20:23 +0100
commit986836503e49ccf7e84b813715d344964ec93566 (patch)
treeb3bea7428efde5b77096cef80e5b6bfee494cc12 /drivers/block/drbd/drbd_nl.c
parentccae7868b0c5697508a541c531cf96b361d62c1c (diff)
parent328e0f125bf41f4f33f684db22015f92cb44fe56 (diff)
Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r--drivers/block/drbd/drbd_nl.c3318
1 files changed, 1969 insertions, 1349 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index c8dda4e8dfc..76bb3a684b8 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -29,159 +29,317 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/slab.h>
-#include <linux/connector.h>
#include <linux/blkpg.h>
#include <linux/cpumask.h>
#include "drbd_int.h"
#include "drbd_req.h"
#include "drbd_wrappers.h"
#include <asm/unaligned.h>
-#include <linux/drbd_tag_magic.h>
#include <linux/drbd_limits.h>
-#include <linux/compiler.h>
#include <linux/kthread.h>
-static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
-static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
-static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
-
-/* see get_sb_bdev and bd_claim */
+#include <net/genetlink.h>
+
+/* .doit */
+// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
+// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
+/* .dumpit */
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
+
+#include <linux/drbd_genl_api.h>
+#include "drbd_nla.h"
+#include <linux/genl_magic_func.h>
+
+/* used blkdev_get_by_path, to claim our meta data device(s) */
static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
-/* Generate the tag_list to struct functions */
-#define NL_PACKET(name, number, fields) \
-static int name ## _from_tags(struct drbd_conf *mdev, \
- unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
-static int name ## _from_tags(struct drbd_conf *mdev, \
- unsigned short *tags, struct name *arg) \
-{ \
- int tag; \
- int dlen; \
- \
- while ((tag = get_unaligned(tags++)) != TT_END) { \
- dlen = get_unaligned(tags++); \
- switch (tag_number(tag)) { \
- fields \
- default: \
- if (tag & T_MANDATORY) { \
- dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \
- return 0; \
- } \
- } \
- tags = (unsigned short *)((char *)tags + dlen); \
- } \
- return 1; \
-}
-#define NL_INTEGER(pn, pr, member) \
- case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
- arg->member = get_unaligned((int *)(tags)); \
- break;
-#define NL_INT64(pn, pr, member) \
- case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
- arg->member = get_unaligned((u64 *)(tags)); \
+/* Configuration is strictly serialized, because generic netlink message
+ * processing is strictly serialized by the genl_lock().
+ * Which means we can use one static global drbd_config_context struct.
+ */
+static struct drbd_config_context {
+ /* assigned from drbd_genlmsghdr */
+ unsigned int minor;
+ /* assigned from request attributes, if present */
+ unsigned int volume;
+#define VOLUME_UNSPECIFIED (-1U)
+ /* pointer into the request skb,
+ * limited lifetime! */
+ char *resource_name;
+ struct nlattr *my_addr;
+ struct nlattr *peer_addr;
+
+ /* reply buffer */
+ struct sk_buff *reply_skb;
+ /* pointer into reply buffer */
+ struct drbd_genlmsghdr *reply_dh;
+ /* resolved from attributes, if possible */
+ struct drbd_conf *mdev;
+ struct drbd_tconn *tconn;
+} adm_ctx;
+
+static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
+{
+ genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
+ if (genlmsg_reply(skb, info))
+ printk(KERN_ERR "drbd: error sending genl reply\n");
+}
+
+/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
+ * reason it could fail was no space in skb, and there are 4k available. */
+int drbd_msg_put_info(const char *info)
+{
+ struct sk_buff *skb = adm_ctx.reply_skb;
+ struct nlattr *nla;
+ int err = -EMSGSIZE;
+
+ if (!info || !info[0])
+ return 0;
+
+ nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
+ if (!nla)
+ return err;
+
+ err = nla_put_string(skb, T_info_text, info);
+ if (err) {
+ nla_nest_cancel(skb, nla);
+ return err;
+ } else
+ nla_nest_end(skb, nla);
+ return 0;
+}
+
+/* This would be a good candidate for a "pre_doit" hook,
+ * and per-family private info->pointers.
+ * But we need to stay compatible with older kernels.
+ * If it returns successfully, adm_ctx members are valid.
+ */
+#define DRBD_ADM_NEED_MINOR 1
+#define DRBD_ADM_NEED_RESOURCE 2
+#define DRBD_ADM_NEED_CONNECTION 4
+static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
+ unsigned flags)
+{
+ struct drbd_genlmsghdr *d_in = info->userhdr;
+ const u8 cmd = info->genlhdr->cmd;
+ int err;
+
+ memset(&adm_ctx, 0, sizeof(adm_ctx));
+
+ /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
+ if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!adm_ctx.reply_skb) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
+ info, &drbd_genl_family, 0, cmd);
+ /* put of a few bytes into a fresh skb of >= 4k will always succeed.
+ * but anyways */
+ if (!adm_ctx.reply_dh) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ adm_ctx.reply_dh->minor = d_in->minor;
+ adm_ctx.reply_dh->ret_code = NO_ERROR;
+
+ adm_ctx.volume = VOLUME_UNSPECIFIED;
+ if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
+ struct nlattr *nla;
+ /* parse and validate only */
+ err = drbd_cfg_context_from_attrs(NULL, info);
+ if (err)
+ goto fail;
+
+ /* It was present, and valid,
+ * copy it over to the reply skb. */
+ err = nla_put_nohdr(adm_ctx.reply_skb,
+ info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
+ info->attrs[DRBD_NLA_CFG_CONTEXT]);
+ if (err)
+ goto fail;
+
+ /* and assign stuff to the global adm_ctx */
+ nla = nested_attr_tb[__nla_type(T_ctx_volume)];
+ if (nla)
+ adm_ctx.volume = nla_get_u32(nla);
+ nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
+ if (nla)
+ adm_ctx.resource_name = nla_data(nla);
+ adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
+ adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
+ if ((adm_ctx.my_addr &&
+ nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) ||
+ (adm_ctx.peer_addr &&
+ nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) {
+ err = -EINVAL;
+ goto fail;
+ }
+ }
+
+ adm_ctx.minor = d_in->minor;
+ adm_ctx.mdev = minor_to_mdev(d_in->minor);
+ adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name);
+
+ if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
+ drbd_msg_put_info("unknown minor");
+ return ERR_MINOR_INVALID;
+ }
+ if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_RESOURCE)) {
+ drbd_msg_put_info("unknown resource");
+ return ERR_INVALID_REQUEST;
+ }
+
+ if (flags & DRBD_ADM_NEED_CONNECTION) {
+ if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) {
+ drbd_msg_put_info("no resource name expected");
+ return ERR_INVALID_REQUEST;
+ }
+ if (adm_ctx.mdev) {
+ drbd_msg_put_info("no minor number expected");
+ return ERR_INVALID_REQUEST;
+ }
+ if (adm_ctx.my_addr && adm_ctx.peer_addr)
+ adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
+ nla_len(adm_ctx.my_addr),
+ nla_data(adm_ctx.peer_addr),
+ nla_len(adm_ctx.peer_addr));
+ if (!adm_ctx.tconn) {
+ drbd_msg_put_info("unknown connection");
+ return ERR_INVALID_REQUEST;
+ }
+ }
+
+ /* some more paranoia, if the request was over-determined */
+ if (adm_ctx.mdev && adm_ctx.tconn &&
+ adm_ctx.mdev->tconn != adm_ctx.tconn) {
+ pr_warning("request: minor=%u, resource=%s; but that minor belongs to connection %s\n",
+ adm_ctx.minor, adm_ctx.resource_name,
+ adm_ctx.mdev->tconn->name);
+ drbd_msg_put_info("minor exists in different resource");
+ return ERR_INVALID_REQUEST;
+ }
+ if (adm_ctx.mdev &&
+ adm_ctx.volume != VOLUME_UNSPECIFIED &&
+ adm_ctx.volume != adm_ctx.mdev->vnr) {
+ pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
+ adm_ctx.minor, adm_ctx.volume,
+ adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
+ drbd_msg_put_info("minor exists as different volume");
+ return ERR_INVALID_REQUEST;
+ }
+
+ return NO_ERROR;
+
+fail:
+ nlmsg_free(adm_ctx.reply_skb);
+ adm_ctx.reply_skb = NULL;
+ return err;
+}
+
+static int drbd_adm_finish(struct genl_info *info, int retcode)
+{
+ if (adm_ctx.tconn) {
+ kref_put(&adm_ctx.tconn->kref, &conn_destroy);
+ adm_ctx.tconn = NULL;
+ }
+
+ if (!adm_ctx.reply_skb)
+ return -ENOMEM;
+
+ adm_ctx.reply_dh->ret_code = retcode;
+ drbd_adm_send_reply(adm_ctx.reply_skb, info);
+ return 0;
+}
+
+static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
+{
+ char *afs;
+
+ /* FIXME: A future version will not allow this case. */
+ if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0)
+ return;
+
+ switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) {
+ case AF_INET6:
+ afs = "ipv6";
+ snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
+ &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr);
break;
-#define NL_BIT(pn, pr, member) \
- case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
- arg->member = *(char *)(tags) ? 1 : 0; \
+ case AF_INET:
+ afs = "ipv4";
+ snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+ &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
break;
-#define NL_STRING(pn, pr, member, len) \
- case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
- if (dlen > len) { \
- dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \
- #member, dlen, (unsigned int)len); \
- return 0; \
- } \
- arg->member ## _len = dlen; \
- memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
- break;
-#include <linux/drbd_nl.h>
-
-/* Generate the struct to tag_list functions */
-#define NL_PACKET(name, number, fields) \
-static unsigned short* \
-name ## _to_tags(struct drbd_conf *mdev, \
- struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
-static unsigned short* \
-name ## _to_tags(struct drbd_conf *mdev, \
- struct name *arg, unsigned short *tags) \
-{ \
- fields \
- return tags; \
-}
-
-#define NL_INTEGER(pn, pr, member) \
- put_unaligned(pn | pr | TT_INTEGER, tags++); \
- put_unaligned(sizeof(int), tags++); \
- put_unaligned(arg->member, (int *)tags); \
- tags = (unsigned short *)((char *)tags+sizeof(int));
-#define NL_INT64(pn, pr, member) \
- put_unaligned(pn | pr | TT_INT64, tags++); \
- put_unaligned(sizeof(u64), tags++); \
- put_unaligned(arg->member, (u64 *)tags); \
- tags = (unsigned short *)((char *)tags+sizeof(u64));
-#define NL_BIT(pn, pr, member) \
- put_unaligned(pn | pr | TT_BIT, tags++); \
- put_unaligned(sizeof(char), tags++); \
- *(char *)tags = arg->member; \
- tags = (unsigned short *)((char *)tags+sizeof(char));
-#define NL_STRING(pn, pr, member, len) \
- put_unaligned(pn | pr | TT_STRING, tags++); \
- put_unaligned(arg->member ## _len, tags++); \
- memcpy(tags, arg->member, arg->member ## _len); \
- tags = (unsigned short *)((char *)tags + arg->member ## _len);
-#include <linux/drbd_nl.h>
-
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
-void drbd_nl_send_reply(struct cn_msg *, int);
+ default:
+ afs = "ssocks";
+ snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+ &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
+ }
+ snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
+}
int drbd_khelper(struct drbd_conf *mdev, char *cmd)
{
char *envp[] = { "HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
- NULL, /* Will be set to address family */
- NULL, /* Will be set to address */
+ (char[20]) { }, /* address family */
+ (char[60]) { }, /* address */
NULL };
-
- char mb[12], af[20], ad[60], *afs;
+ char mb[12];
char *argv[] = {usermode_helper, cmd, mb, NULL };
+ struct drbd_tconn *tconn = mdev->tconn;
+ struct sib_info sib;
int ret;
- if (current == mdev->worker.task)
- drbd_set_flag(mdev, CALLBACK_PENDING);
+ if (current == tconn->worker.task)
+ set_bit(CALLBACK_PENDING, &tconn->flags);
snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
-
- if (get_net_conf(mdev)) {
- switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) {
- case AF_INET6:
- afs = "ipv6";
- snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6",
- &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr);
- break;
- case AF_INET:
- afs = "ipv4";
- snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
- &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
- break;
- default:
- afs = "ssocks";
- snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
- &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
- }
- snprintf(af, 20, "DRBD_PEER_AF=%s", afs);
- envp[3]=af;
- envp[4]=ad;
- put_net_conf(mdev);
- }
+ setup_khelper_env(tconn, envp);
/* The helper may take some time.
* write out any unsynced meta data changes now */
drbd_md_sync(mdev);
dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
-
- drbd_bcast_ev_helper(mdev, cmd);
+ sib.sib_reason = SIB_HELPER_PRE;
+ sib.helper_name = cmd;
+ drbd_bcast_event(mdev, &sib);
ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
@@ -191,9 +349,46 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
+ sib.sib_reason = SIB_HELPER_POST;
+ sib.helper_exit_code = ret;
+ drbd_bcast_event(mdev, &sib);
+
+ if (current == tconn->worker.task)
+ clear_bit(CALLBACK_PENDING, &tconn->flags);
+
+ if (ret < 0) /* Ignore any ERRNOs we got. */
+ ret = 0;
+
+ return ret;
+}
+
+int conn_khelper(struct drbd_tconn *tconn, char *cmd)
+{
+ char *envp[] = { "HOME=/",
+ "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ (char[20]) { }, /* address family */
+ (char[60]) { }, /* address */
+ NULL };
+ char *argv[] = {usermode_helper, cmd, tconn->name, NULL };
+ int ret;
+
+ setup_khelper_env(tconn, envp);
+ conn_md_sync(tconn);
- if (current == mdev->worker.task)
- drbd_clear_flag(mdev, CALLBACK_PENDING);
+ conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name);
+ /* TODO: conn_bcast_event() ?? */
+
+ ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
+ if (ret)
+ conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
+ usermode_helper, cmd, tconn->name,
+ (ret >> 8) & 0xff, ret);
+ else
+ conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
+ usermode_helper, cmd, tconn->name,
+ (ret >> 8) & 0xff, ret);
+ /* TODO: conn_bcast_event() ?? */
if (ret < 0) /* Ignore any ERRNOs we got. */
ret = 0;
@@ -201,116 +396,129 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
return ret;
}
-enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
+static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
{
+ enum drbd_fencing_p fp = FP_NOT_AVAIL;
+ struct drbd_conf *mdev;
+ int vnr;
+
+ rcu_read_lock();
+ idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+ if (get_ldev_if_state(mdev, D_CONSISTENT)) {
+ fp = max_t(enum drbd_fencing_p, fp,
+ rcu_dereference(mdev->ldev->disk_conf)->fencing);
+ put_ldev(mdev);
+ }
+ }
+ rcu_read_unlock();
+
+ return fp;
+}
+
+bool conn_try_outdate_peer(struct drbd_tconn *tconn)
+{
+ union drbd_state mask = { };
+ union drbd_state val = { };
+ enum drbd_fencing_p fp;
char *ex_to_string;
int r;
- enum drbd_disk_state nps;
- enum drbd_fencing_p fp;
- D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+ if (tconn->cstate >= C_WF_REPORT_PARAMS) {
+ conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
+ return false;
+ }
- if (get_ldev_if_state(mdev, D_CONSISTENT)) {
- fp = mdev->ldev->dc.fencing;
- put_ldev(mdev);
- } else {
- dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
- nps = mdev->state.pdsk;
+ fp = highest_fencing_policy(tconn);
+ switch (fp) {
+ case FP_NOT_AVAIL:
+ conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
goto out;
+ case FP_DONT_CARE:
+ return true;
+ default: ;
}
- r = drbd_khelper(mdev, "fence-peer");
+ r = conn_khelper(tconn, "fence-peer");
switch ((r>>8) & 0xff) {
case 3: /* peer is inconsistent */
ex_to_string = "peer is inconsistent or worse";
- nps = D_INCONSISTENT;
+ mask.pdsk = D_MASK;
+ val.pdsk = D_INCONSISTENT;
break;
case 4: /* peer got outdated, or was already outdated */
ex_to_string = "peer was fenced";
- nps = D_OUTDATED;
+ mask.pdsk = D_MASK;
+ val.pdsk = D_OUTDATED;
break;
case 5: /* peer was down */
- if (mdev->state.disk == D_UP_TO_DATE) {
+ if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
/* we will(have) create(d) a new UUID anyways... */
ex_to_string = "peer is unreachable, assumed to be dead";
- nps = D_OUTDATED;
+ mask.pdsk = D_MASK;
+ val.pdsk = D_OUTDATED;
} else {
ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
- nps = mdev->state.pdsk;
}
break;
case 6: /* Peer is primary, voluntarily outdate myself.
* This is useful when an unconnected R_SECONDARY is asked to
* become R_PRIMARY, but finds the other peer being active. */
ex_to_string = "peer is active";
- dev_warn(DEV, "Peer is primary, outdating myself.\n");
- nps = D_UNKNOWN;
- _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE);
+ conn_warn(tconn, "Peer is primary, outdating myself.\n");
+ mask.disk = D_MASK;
+ val.disk = D_OUTDATED;
break;
case 7:
if (fp != FP_STONITH)
- dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
+ conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
ex_to_string = "peer was stonithed";
- nps = D_OUTDATED;
+ mask.pdsk = D_MASK;
+ val.pdsk = D_OUTDATED;
break;
default:
/* The script is broken ... */
- nps = D_UNKNOWN;
- dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
- return nps;
+ conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
+ return false; /* Eventually leave IO frozen */
}
- dev_info(DEV, "fence-peer helper returned %d (%s)\n",
- (r>>8) & 0xff, ex_to_string);
+ conn_info(tconn, "fence-peer helper returned %d (%s)\n",
+ (r>>8) & 0xff, ex_to_string);
-out:
- if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
- /* The handler was not successful... unfreeze here, the
- state engine can not unfreeze... */
- _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
- }
+ out:
- return nps;
+ /* Not using
+ conn_request_state(tconn, mask, val, CS_VERBOSE);
+ here, because we might were able to re-establish the connection in the
+ meantime. */
+ spin_lock_irq(&tconn->req_lock);
+ if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
+ _conn_request_state(tconn, mask, val, CS_VERBOSE);
+ spin_unlock_irq(&tconn->req_lock);
+
+ return conn_highest_pdsk(tconn) <= D_OUTDATED;
}
static int _try_outdate_peer_async(void *data)
{
- struct drbd_conf *mdev = (struct drbd_conf *)data;
- enum drbd_disk_state nps;
- union drbd_state ns;
+ struct drbd_tconn *tconn = (struct drbd_tconn *)data;
- nps = drbd_try_outdate_peer(mdev);
-
- /* Not using
- drbd_request_state(mdev, NS(pdsk, nps));
- here, because we might were able to re-establish the connection
- in the meantime. This can only partially be solved in the state's
- engine is_valid_state() and is_valid_state_transition()
- functions.
-
- nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
- pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
- therefore we have to have the pre state change check here.
- */
- spin_lock_irq(&mdev->req_lock);
- ns = mdev->state;
- if (ns.conn < C_WF_REPORT_PARAMS && !drbd_test_flag(mdev, STATE_SENT)) {
- ns.pdsk = nps;
- _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
- }
- spin_unlock_irq(&mdev->req_lock);
+ conn_try_outdate_peer(tconn);
+ kref_put(&tconn->kref, &conn_destroy);
return 0;
}
-void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
+void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
{
struct task_struct *opa;
- opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
- if (IS_ERR(opa))
- dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
+ kref_get(&tconn->kref);
+ opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
+ if (IS_ERR(opa)) {
+ conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
+ kref_put(&tconn->kref, &conn_destroy);
+ }
}
enum drbd_state_rv
@@ -318,15 +526,15 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
{
const int max_tries = 4;
enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
+ struct net_conf *nc;
int try = 0;
int forced = 0;
union drbd_state mask, val;
- enum drbd_disk_state nps;
if (new_role == R_PRIMARY)
- request_ping(mdev); /* Detect a dead peer ASAP */
+ request_ping(mdev->tconn); /* Detect a dead peer ASAP */
- mutex_lock(&mdev->state_mutex);
+ mutex_lock(mdev->state_mutex);
mask.i = 0; mask.role = R_MASK;
val.i = 0; val.role = new_role;
@@ -354,38 +562,34 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
if (rv == SS_NO_UP_TO_DATE_DISK &&
mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
- nps = drbd_try_outdate_peer(mdev);
- if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
+ if (conn_try_outdate_peer(mdev->tconn)) {
val.disk = D_UP_TO_DATE;
mask.disk = D_MASK;
}
-
- val.pdsk = nps;
- mask.pdsk = D_MASK;
-
continue;
}
if (rv == SS_NOTHING_TO_DO)
- goto fail;
+ goto out;
if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
- nps = drbd_try_outdate_peer(mdev);
-
- if (force && nps > D_OUTDATED) {
+ if (!conn_try_outdate_peer(mdev->tconn) && force) {
dev_warn(DEV, "Forced into split brain situation!\n");
- nps = D_OUTDATED;
- }
-
- mask.pdsk = D_MASK;
- val.pdsk = nps;
+ mask.pdsk = D_MASK;
+ val.pdsk = D_OUTDATED;
+ }
continue;
}
if (rv == SS_TWO_PRIMARIES) {
/* Maybe the peer is detected as dead very soon...
retry at most once more in this case. */
- schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10);
+ int timeo;
+ rcu_read_lock();
+ nc = rcu_dereference(mdev->tconn->net_conf);
+ timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
+ rcu_read_unlock();
+ schedule_timeout_interruptible(timeo);
if (try < max_tries)
try = max_tries - 1;
continue;
@@ -394,13 +598,13 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
rv = _drbd_request_state(mdev, mask, val,
CS_VERBOSE + CS_WAIT_COMPLETE);
if (rv < SS_SUCCESS)
- goto fail;
+ goto out;
}
break;
}
if (rv < SS_SUCCESS)
- goto fail;
+ goto out;
if (forced)
dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
@@ -408,6 +612,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
/* Wait until nothing is on the fly :) */
wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
+ /* FIXME also wait for all pending P_BARRIER_ACK? */
+
if (new_role == R_SECONDARY) {
set_disk_ro(mdev->vdisk, true);
if (get_ldev(mdev)) {
@@ -415,10 +621,12 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
put_ldev(mdev);
}
} else {
- if (get_net_conf(mdev)) {
- mdev->net_conf->want_lose = 0;
- put_net_conf(mdev);
- }
+ mutex_lock(&mdev->tconn->conf_update);
+ nc = mdev->tconn->net_conf;
+ if (nc)
+ nc->discard_my_data = 0; /* without copy; single bit op is atomic */
+ mutex_unlock(&mdev->tconn->conf_update);
+
set_disk_ro(mdev->vdisk, false);
if (get_ldev(mdev)) {
if (((mdev->state.conn < C_CONNECTED ||
@@ -444,67 +652,47 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
drbd_md_sync(mdev);
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
- fail:
- mutex_unlock(&mdev->state_mutex);
+out:
+ mutex_unlock(mdev->state_mutex);
return rv;
}
-static struct drbd_conf *ensure_mdev(int minor, int create)
+static const char *from_attrs_err_to_txt(int err)
{
- struct drbd_conf *mdev;
-
- if (minor >= minor_count)
- return NULL;
-
- mdev = minor_to_mdev(minor);
-
- if (!mdev && create) {
- struct gendisk *disk = NULL;
- mdev = drbd_new_device(minor);
-
- spin_lock_irq(&drbd_pp_lock);
- if (minor_table[minor] == NULL) {
- minor_table[minor] = mdev;
- disk = mdev->vdisk;
- mdev = NULL;
- } /* else: we lost the race */
- spin_unlock_irq(&drbd_pp_lock);
-
- if (disk) /* we won the race above */
- /* in case we ever add a drbd_delete_device(),
- * don't forget the del_gendisk! */
- add_disk(disk);
- else /* we lost the race above */
- drbd_free_mdev(mdev);
-
- mdev = minor_to_mdev(minor);
- }
-
- return mdev;
+ return err == -ENOMSG ? "required attribute missing" :
+ err == -EOPNOTSUPP ? "unknown mandatory attribute" :
+ err == -EEXIST ? "can not change invariant setting" :
+ "invalid attribute value";
}
-static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
- struct drbd_nl_cfg_reply *reply)
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
{
- struct primary primary_args;
-
- memset(&primary_args, 0, sizeof(struct primary));
- if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) {
- reply->ret_code = ERR_MANDATORY_TAG;
- return 0;
- }
-
- reply->ret_code =
- drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
+ struct set_role_parms parms;
+ int err;
+ enum drbd_ret_code retcode;
- return 0;
-}
+ retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ if (!adm_ctx.reply_skb)
+ return retcode;
+ if (retcode != NO_ERROR)
+ goto out;
-static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
- struct drbd_nl_cfg_reply *reply)
-{
- reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
+ memset(&parms, 0, sizeof(parms));
+ if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
+ err = set_role_parms_from_attrs(&parms, info);
+ if (err) {
+ retcode = ERR_MANDATORY_TAG;
+ drbd_msg_put_info(from_attrs_err_to_txt(err));
+ goto out;
+ }
+ }
+ if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
+ retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
+ else
+ retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
+out:
+ drbd_adm_finish(info, retcode);
return 0;
}
@@ -514,7 +702,12 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev)
{
sector_t md_size_sect = 0;
- switch (bdev->dc.meta_dev_idx) {
+ int meta_dev_idx;
+
+ rcu_read_lock();
+ meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+
+ switch (meta_dev_idx) {
default:
/* v07 style fixed size indexed meta data */
bdev->md.md_size_sect = MD_RESERVED_SECT;
@@ -533,7 +726,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
case DRBD_MD_INDEX_FLEX_INT:
bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
/* al size is still fixed */
- bdev->md.al_offset = -MD_AL_MAX_SIZE;
+ bdev->md.al_offset = -MD_AL_SECTORS;
/* we need (slightly less than) ~ this much bitmap sectors: */
md_size_sect = drbd_get_capacity(bdev->backing_bdev);
md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
@@ -549,6 +742,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
break;
}
+ rcu_read_unlock();
}
/* input size is expected to be in KB */
@@ -581,17 +775,23 @@ char *ppsize(char *buf, unsigned long long size)
* R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
* peer may not initiate a resize.
*/
+/* Note these are not to be confused with
+ * drbd_adm_suspend_io/drbd_adm_resume_io,
+ * which are (sub) state changes triggered by admin (drbdsetup),
+ * and can be long lived.
+ * This changes an mdev->flag, is triggered by drbd internals,
+ * and should be short-lived. */
void drbd_suspend_io(struct drbd_conf *mdev)
{
- drbd_set_flag(mdev, SUSPEND_IO);
- if (is_susp(mdev->state))
+ set_bit(SUSPEND_IO, &mdev->flags);
+ if (drbd_suspended(mdev))
return;
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
}
void drbd_resume_io(struct drbd_conf *mdev)
{
- drbd_clear_flag(mdev, SUSPEND_IO);
+ clear_bit(SUSPEND_IO, &mdev->flags);
wake_up(&mdev->misc_wait);
}
@@ -605,7 +805,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
{
sector_t prev_first_sect, prev_size; /* previous meta location */
- sector_t la_size;
+ sector_t la_size, u_size;
sector_t size;
char ppb[10];
@@ -633,7 +833,10 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
/* TODO: should only be some assert here, not (re)init... */
drbd_md_set_sector_offsets(mdev, mdev->ldev);
- size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED);
+ rcu_read_lock();
+ u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+ rcu_read_unlock();
+ size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
if (drbd_get_capacity(mdev->this_bdev) != size ||
drbd_bm_capacity(mdev) != size) {
@@ -696,12 +899,12 @@ out:
}
sector_t
-drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
+ sector_t u_size, int assume_peer_has_space)
{
sector_t p_size = mdev->p_size; /* partner's disk size. */
sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
sector_t m_size; /* my size */
- sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
sector_t size = 0;
m_size = drbd_get_max_capacity(bdev);
@@ -750,24 +953,21 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int ass
* failed, and 0 on success. You should call drbd_md_sync() after you called
* this function.
*/
-static int drbd_check_al_size(struct drbd_conf *mdev)
+static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
{
struct lru_cache *n, *t;
struct lc_element *e;
unsigned int in_use;
int i;
- ERR_IF(mdev->sync_conf.al_extents < 7)
- mdev->sync_conf.al_extents = 127;
-
if (mdev->act_log &&
- mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
+ mdev->act_log->nr_elements == dc->al_extents)
return 0;
in_use = 0;
t = mdev->act_log;
- n = lc_create("act_log", drbd_al_ext_cache,
- mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
+ n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
+ dc->al_extents, sizeof(struct lc_element), 0);
if (n == NULL) {
dev_err(DEV, "Cannot allocate act_log lru!\n");
@@ -808,7 +1008,9 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
- max_segments = mdev->ldev->dc.max_bio_bvecs;
+ rcu_read_lock();
+ max_segments = rcu_dereference(mdev->ldev->disk_conf)->max_bio_bvecs;
+ rcu_read_unlock();
put_ldev(mdev);
}
@@ -852,12 +1054,14 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
Because new from 8.3.8 onwards the peer can use multiple
BIOs for a single peer_request */
if (mdev->state.conn >= C_CONNECTED) {
- if (mdev->agreed_pro_version < 94) {
- peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+ if (mdev->tconn->agreed_pro_version < 94)
+ peer = min( mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
- } else if (mdev->agreed_pro_version == 94)
+ else if (mdev->tconn->agreed_pro_version == 94)
peer = DRBD_MAX_SIZE_H80_PACKET;
- else /* drbd 8.3.8 onwards */
+ else if (mdev->tconn->agreed_pro_version < 100)
+ peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
+ else
peer = DRBD_MAX_BIO_SIZE;
}
@@ -872,36 +1076,27 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
dr