aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-30 11:17:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-30 11:17:19 -0700
commitaf56e0aa35f3ae2a4c1a6d1000702df1dd78cb76 (patch)
tree304bd85e5db2d07efa2913aa7c6313b918cfbfdb
parent65a50c951a38e9827dd9655b6e686bde912e799b (diff)
parent6bd9adbdf9ca6a052b0b7455ac67b925eb38cfad (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph updates from Sage Weil: "There are some updates and cleanups to the CRUSH placement code, a bug fix with incremental maps, several cleanups and fixes from Josh Durgin in the RBD block device code, a series of cleanups and bug fixes from Alex Elder in the messenger code, and some miscellaneous bounds checking and gfp cleanups/fixes." Fix up trivial conflicts in net/ceph/{messenger.c,osdmap.c} due to the networking people preferring "unsigned int" over just "unsigned". * git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (45 commits) libceph: fix pg_temp updates libceph: avoid unregistering osd request when not registered ceph: add auth buf in prepare_write_connect() ceph: rename prepare_connect_authorizer() ceph: return pointer from prepare_connect_authorizer() ceph: use info returned by get_authorizer ceph: have get_authorizer methods return pointers ceph: ensure auth ops are defined before use ceph: messenger: reduce args to create_authorizer ceph: define ceph_auth_handshake type ceph: messenger: check return from get_authorizer ceph: messenger: rework prepare_connect_authorizer() ceph: messenger: check prepare_write_connect() result ceph: don't set WRITE_PENDING too early ceph: drop msgr argument from prepare_write_connect() ceph: messenger: send banner in process_connect() ceph: messenger: reset connection kvec caller libceph: don't reset kvec in prepare_write_banner() ceph: ignore preferred_osd field ceph: fully initialize new layout ...
-rw-r--r--Documentation/ABI/testing/sysfs-bus-rbd4
-rw-r--r--drivers/block/rbd.c72
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/ceph/ioctl.c102
-rw-r--r--fs/ceph/ioctl.h2
-rw-r--r--fs/ceph/mds_client.c54
-rw-r--r--fs/ceph/mds_client.h5
-rw-r--r--fs/ceph/xattr.c9
-rw-r--r--include/linux/ceph/auth.h12
-rw-r--r--include/linux/ceph/ceph_fs.h4
-rw-r--r--include/linux/ceph/decode.h9
-rw-r--r--include/linux/ceph/messenger.h6
-rw-r--r--include/linux/ceph/osd_client.h11
-rw-r--r--include/linux/ceph/osdmap.h2
-rw-r--r--include/linux/crush/crush.h18
-rw-r--r--include/linux/crush/mapper.h7
-rw-r--r--net/ceph/auth_none.c15
-rw-r--r--net/ceph/auth_x.c15
-rw-r--r--net/ceph/crush/crush.c39
-rw-r--r--net/ceph/crush/mapper.c124
-rw-r--r--net/ceph/messenger.c182
-rw-r--r--net/ceph/osd_client.c63
-rw-r--r--net/ceph/osdmap.c73
23 files changed, 376 insertions, 453 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index dbedafb095e..bcd88eb7ebc 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -65,11 +65,11 @@ snap_*
Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name>
-------------------------------------------------------------
-id
+snap_id
The rados internal snapshot id assigned for this snapshot
-size
+snap_size
The size of the image when this snapshot was taken.
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 013c7a549fb..65665c9c42c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -141,7 +141,7 @@ struct rbd_request {
struct rbd_snap {
struct device dev;
const char *name;
- size_t size;
+ u64 size;
struct list_head node;
u64 id;
};
@@ -175,8 +175,7 @@ struct rbd_device {
/* protects updating the header */
struct rw_semaphore header_rwsem;
char snap_name[RBD_MAX_SNAP_NAME_LEN];
- u32 cur_snap; /* index+1 of current snapshot within snap context
- 0 - for the head */
+ u64 snap_id; /* current snapshot id */
int read_only;
struct list_head node;
@@ -241,7 +240,7 @@ static void rbd_put_dev(struct rbd_device *rbd_dev)
put_device(&rbd_dev->dev);
}
-static int __rbd_update_snaps(struct rbd_device *rbd_dev);
+static int __rbd_refresh_header(struct rbd_device *rbd_dev);
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
@@ -450,7 +449,9 @@ static void rbd_client_release(struct kref *kref)
struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
dout("rbd_release_client %p\n", rbdc);
+ spin_lock(&rbd_client_list_lock);
list_del(&rbdc->node);
+ spin_unlock(&rbd_client_list_lock);
ceph_destroy_client(rbdc->client);
kfree(rbdc->rbd_opts);
@@ -463,9 +464,7 @@ static void rbd_client_release(struct kref *kref)
*/
static void rbd_put_client(struct rbd_device *rbd_dev)
{
- spin_lock(&rbd_client_list_lock);
kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
- spin_unlock(&rbd_client_list_lock);
rbd_dev->rbd_client = NULL;
}
@@ -487,16 +486,18 @@ static void rbd_coll_release(struct kref *kref)
*/
static int rbd_header_from_disk(struct rbd_image_header *header,
struct rbd_image_header_ondisk *ondisk,
- int allocated_snaps,
+ u32 allocated_snaps,
gfp_t gfp_flags)
{
- int i;
- u32 snap_count;
+ u32 i, snap_count;
if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
return -ENXIO;
snap_count = le32_to_cpu(ondisk->snap_count);
+ if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context))
+ / sizeof (*ondisk))
+ return -EINVAL;
header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
snap_count * sizeof (*ondisk),
gfp_flags);
@@ -506,11 +507,11 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
if (snap_count) {
header->snap_names = kmalloc(header->snap_names_len,
- GFP_KERNEL);
+ gfp_flags);
if (!header->snap_names)
goto err_snapc;
header->snap_sizes = kmalloc(snap_count * sizeof(u64),
- GFP_KERNEL);
+ gfp_flags);
if (!header->snap_sizes)
goto err_names;
} else {
@@ -552,21 +553,6 @@ err_snapc:
return -ENOMEM;
}
-static int snap_index(struct rbd_image_header *header, int snap_num)
-{
- return header->total_snaps - snap_num;
-}
-
-static u64 cur_snap_id(struct rbd_device *rbd_dev)
-{
- struct rbd_image_header *header = &rbd_dev->header;
-
- if (!rbd_dev->cur_snap)
- return 0;
-
- return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
-}
-
static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
u64 *seq, u64 *size)
{
@@ -605,7 +591,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
snapc->seq = header->snap_seq;
else
snapc->seq = 0;
- dev->cur_snap = 0;
+ dev->snap_id = CEPH_NOSNAP;
dev->read_only = 0;
if (size)
*size = header->image_size;
@@ -613,8 +599,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
if (ret < 0)
goto done;
-
- dev->cur_snap = header->total_snaps - ret;
+ dev->snap_id = snapc->seq;
dev->read_only = 1;
}
@@ -935,7 +920,6 @@ static int rbd_do_request(struct request *rq,
layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
layout->fl_stripe_count = cpu_to_le32(1);
layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
- layout->fl_pg_preferred = cpu_to_le32(-1);
layout->fl_pg_pool = cpu_to_le32(dev->poolid);
ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
req, ops);
@@ -1168,7 +1152,7 @@ static int rbd_req_read(struct request *rq,
int coll_index)
{
return rbd_do_op(rq, rbd_dev, NULL,
- (snapid ? snapid : CEPH_NOSNAP),
+ snapid,
CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
2,
@@ -1187,7 +1171,7 @@ static int rbd_req_sync_read(struct rbd_device *dev,
u64 *ver)
{
return rbd_req_sync_op(dev, NULL,
- (snapid ? snapid : CEPH_NOSNAP),
+ snapid,
CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
NULL,
@@ -1238,7 +1222,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
notify_id, (int)opcode);
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_update_snaps(dev);
+ rc = __rbd_refresh_header(dev);
mutex_unlock(&ctl_mutex);
if (rc)
pr_warning(RBD_DRV_NAME "%d got notification but failed to "
@@ -1521,7 +1505,7 @@ static void rbd_rq_fn(struct request_queue *q)
coll, cur_seg);
else
rbd_req_read(rq, rbd_dev,
- cur_snap_id(rbd_dev),
+ rbd_dev->snap_id,
ofs,
op_size, bio,
coll, cur_seg);
@@ -1592,7 +1576,7 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
{
ssize_t rc;
struct rbd_image_header_ondisk *dh;
- int snap_count = 0;
+ u32 snap_count = 0;
u64 ver;
size_t len;
@@ -1656,7 +1640,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
struct ceph_mon_client *monc;
/* we should create a snapshot only if we're pointing at the head */
- if (dev->cur_snap)
+ if (dev->snap_id != CEPH_NOSNAP)
return -EINVAL;
monc = &dev->rbd_client->client->monc;
@@ -1683,7 +1667,9 @@ static int rbd_header_add_snap(struct rbd_device *dev,
if (ret < 0)
return ret;
- dev->header.snapc->seq = new_snapid;
+ down_write(&dev->header_rwsem);
+ dev->header.snapc->seq = new_snapid;
+ up_write(&dev->header_rwsem);
return 0;
bad:
@@ -1703,7 +1689,7 @@ static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
/*
* only read the first part of the ondisk header, without the snaps info
*/
-static int __rbd_update_snaps(struct rbd_device *rbd_dev)
+static int __rbd_refresh_header(struct rbd_device *rbd_dev)
{
int ret;
struct rbd_image_header h;
@@ -1890,7 +1876,7 @@ static ssize_t rbd_image_refresh(struct device *dev,
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_update_snaps(rbd_dev);
+ rc = __rbd_refresh_header(rbd_dev);
if (rc < 0)
ret = rc;
@@ -1949,7 +1935,7 @@ static ssize_t rbd_snap_size_show(struct device *dev,
{
struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
- return sprintf(buf, "%zd\n", snap->size);
+ return sprintf(buf, "%llu\n", (unsigned long long)snap->size);
}
static ssize_t rbd_snap_id_show(struct device *dev,
@@ -1958,7 +1944,7 @@ static ssize_t rbd_snap_id_show(struct device *dev,
{
struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
- return sprintf(buf, "%llu\n", (unsigned long long) snap->id);
+ return sprintf(buf, "%llu\n", (unsigned long long)snap->id);
}
static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
@@ -2173,7 +2159,7 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
rbd_dev->header.obj_version);
if (ret == -ERANGE) {
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_update_snaps(rbd_dev);
+ rc = __rbd_refresh_header(rbd_dev);
mutex_unlock(&ctl_mutex);
if (rc < 0)
return rc;
@@ -2558,7 +2544,7 @@ static ssize_t rbd_snap_add(struct device *dev,
if (ret < 0)
goto err_unlock;
- ret = __rbd_update_snaps(rbd_dev);
+ ret = __rbd_refresh_header(rbd_dev);
if (ret < 0)
goto err_unlock;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ed72428d9c7..988d4f302e4 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -54,7 +54,6 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode)
req->r_fmode = ceph_flags_to_mode(flags);
req->r_args.open.flags = cpu_to_le32(flags);
req->r_args.open.mode = cpu_to_le32(create_mode);
- req->r_args.open.preferred = cpu_to_le32(-1);
out:
return req;
}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 790914a598d..8e3fb69fbe6 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -26,8 +26,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
l.object_size = ceph_file_layout_object_size(ci->i_layout);
l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
- l.preferred_osd =
- (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
+ l.preferred_osd = (s32)-1;
if (copy_to_user(arg, &l, sizeof(l)))
return -EFAULT;
}
@@ -35,6 +34,32 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
return err;
}
+static long __validate_layout(struct ceph_mds_client *mdsc,
+ struct ceph_ioctl_layout *l)
+{
+ int i, err;
+
+ /* validate striping parameters */
+ if ((l->object_size & ~PAGE_MASK) ||
+ (l->stripe_unit & ~PAGE_MASK) ||
+ ((unsigned)l->object_size % (unsigned)l->stripe_unit))
+ return -EINVAL;
+
+ /* make sure it's a valid data pool */
+ mutex_lock(&mdsc->mutex);
+ err = -EINVAL;
+ for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
+ if (mdsc->mdsmap->m_data_pg_pools[i] == l->data_pool) {
+ err = 0;
+ break;
+ }
+ mutex_unlock(&mdsc->mutex);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
{
struct inode *inode = file->f_dentry->d_inode;
@@ -44,52 +69,40 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
struct ceph_ioctl_layout l;
struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
struct ceph_ioctl_layout nl;
- int err, i;
+ int err;
if (copy_from_user(&l, arg, sizeof(l)))
return -EFAULT;
/* validate changed params against current layout */
err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
- if (!err) {
- nl.stripe_unit = ceph_file_layout_su(ci->i_layout);
- nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
- nl.object_size = ceph_file_layout_object_size(ci->i_layout);
- nl.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
- nl.preferred_osd =
- (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
- } else
+ if (err)
return err;
+ memset(&nl, 0, sizeof(nl));
if (l.stripe_count)
nl.stripe_count = l.stripe_count;
+ else
+ nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
if (l.stripe_unit)
nl.stripe_unit = l.stripe_unit;
+ else
+ nl.stripe_unit = ceph_file_layout_su(ci->i_layout);
if (l.object_size)
nl.object_size = l.object_size;
+ else
+ nl.object_size = ceph_file_layout_object_size(ci->i_layout);
if (l.data_pool)
nl.data_pool = l.data_pool;
- if (l.preferred_osd)
- nl.preferred_osd = l.preferred_osd;
+ else
+ nl.data_pool = ceph_file_layout_pg_pool(ci->i_layout);
- if ((nl.object_size & ~PAGE_MASK) ||
- (nl.stripe_unit & ~PAGE_MASK) ||
- ((unsigned)nl.object_size % (unsigned)nl.stripe_unit))
- return -EINVAL;
+ /* this is obsolete, and always -1 */
+ nl.preferred_osd = le64_to_cpu(-1);
- /* make sure it's a valid data pool */
- if (l.data_pool > 0) {
- mutex_lock(&mdsc->mutex);
- err = -EINVAL;
- for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
- if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
- err = 0;
- break;
- }
- mutex_unlock(&mdsc->mutex);
- if (err)
- return err;
- }
+ err = __validate_layout(mdsc, &nl);
+ if (err)
+ return err;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETLAYOUT,
USE_AUTH_MDS);
@@ -106,8 +119,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
req->r_args.setlayout.layout.fl_object_size =
cpu_to_le32(l.object_size);
req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
- req->r_args.setlayout.layout.fl_pg_preferred =
- cpu_to_le32(l.preferred_osd);
parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
@@ -127,33 +138,16 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
struct inode *inode = file->f_dentry->d_inode;
struct ceph_mds_request *req;
struct ceph_ioctl_layout l;
- int err, i;
+ int err;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
/* copy and validate */
if (copy_from_user(&l, arg, sizeof(l)))
return -EFAULT;
- if ((l.object_size & ~PAGE_MASK) ||
- (l.stripe_unit & ~PAGE_MASK) ||
- !l.stripe_unit ||
- (l.object_size &&
- (unsigned)l.object_size % (unsigned)l.stripe_unit))
- return -EINVAL;
-
- /* make sure it's a valid data pool */
- if (l.data_pool > 0) {
- mutex_lock(&mdsc->mutex);
- err = -EINVAL;
- for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
- if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
- err = 0;
- break;
- }
- mutex_unlock(&mdsc->mutex);
- if (err)
- return err;
- }
+ err = __validate_layout(mdsc, &l);
+ if (err)
+ return err;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
USE_AUTH_MDS);
@@ -171,8 +165,6 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
cpu_to_le32(l.object_size);
req->r_args.setlayout.layout.fl_pg_pool =
cpu_to_le32(l.data_pool);
- req->r_args.setlayout.layout.fl_pg_preferred =
- cpu_to_le32(l.preferred_osd);
err = ceph_mdsc_do_request(mdsc, inode, req);
ceph_mdsc_put_request(req);
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index be4a6048733..c77028afb1e 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -34,6 +34,8 @@
struct ceph_ioctl_layout {
__u64 stripe_unit, stripe_count, object_size;
__u64 data_pool;
+
+ /* obsolete. new values ignored, always return -1 */
__s64 preferred_osd;
};
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 89971e137aa..200bc87eceb 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -334,10 +334,10 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
dout("mdsc put_session %p %d -> %d\n", s,
atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
if (atomic_dec_and_test(&s->s_ref)) {
- if (s->s_authorizer)
+ if (s->s_auth.authorizer)
s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
s->s_mdsc->fsc->client->monc.auth,
- s->s_authorizer);
+ s->s_auth.authorizer);
kfree(s);
}
}
@@ -3395,39 +3395,33 @@ out:
/*
* authentication
*/
-static int get_authorizer(struct ceph_connection *con,
- void **buf, int *len, int *proto,
- void **reply_buf, int *reply_len, int force_new)
+
+/*
+ * Note: returned pointer is the address of a structure that's
+ * managed separately. Caller must *not* attempt to free it.
+ */
+static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ int *proto, int force_new)
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
- int ret = 0;
-
- if (force_new && s->s_authorizer) {
- ac->ops->destroy_authorizer(ac, s->s_authorizer);
- s->s_authorizer = NULL;
- }
- if (s->s_authorizer == NULL) {
- if (ac->ops->create_authorizer) {
- ret = ac->ops->create_authorizer(
- ac, CEPH_ENTITY_TYPE_MDS,
- &s->s_authorizer,
- &s->s_authorizer_buf,
- &s->s_authorizer_buf_len,
- &s->s_authorizer_reply_buf,
- &s->s_authorizer_reply_buf_len);
- if (ret)
- return ret;
- }
- }
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ if (force_new && auth->authorizer) {
+ if (ac->ops && ac->ops->destroy_authorizer)
+ ac->ops->destroy_authorizer(ac, auth->authorizer);
+ auth->authorizer = NULL;
+ }
+ if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
+ int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+ auth);
+ if (ret)
+ return ERR_PTR(ret);
+ }
*proto = ac->protocol;
- *buf = s->s_authorizer_buf;
- *len = s->s_authorizer_buf_len;
- *reply_buf = s->s_authorizer_reply_buf;
- *reply_len = s->s_authorizer_reply_buf_len;
- return 0;
+
+ return auth;
}
@@ -3437,7 +3431,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
- return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
+ return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len);
}
static int invalidate_authorizer(struct ceph_connection *con)
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 8c7c04ebb59..dd26846dd71 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -11,6 +11,7 @@
#include <linux/ceph/types.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/auth.h>
/*
* Some lock dependencies:
@@ -113,9 +114,7 @@ struct ceph_mds_session {
struct ceph_connection s_con;
- struct ceph_authorizer *s_authorizer;
- void *s_authorizer_buf, *s_authorizer_reply_buf;
- size_t s_authorizer_buf_len, s_authorizer_reply_buf_len;
+ struct ceph_auth_handshake s_auth;
/* protected by s_gen_ttl_lock */
spinlock_t s_gen_ttl_lock;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 35b86331d8a..785cb3057c9 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -118,15 +118,6 @@ static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
(unsigned long long)ceph_file_layout_su(ci->i_layout),
(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
(unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-
- if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) {
- val += ret;
- size -= ret;
- ret += snprintf(val, size, "preferred_osd=%lld\n",
- (unsigned long long)ceph_file_layout_pg_preferred(
- ci->i_layout));
- }
-
return ret;
}
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index aa13392a7ef..d4080f309b5 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -14,6 +14,14 @@
struct ceph_auth_client;
struct ceph_authorizer;
+struct ceph_auth_handshake {
+ struct ceph_authorizer *authorizer;
+ void *authorizer_buf;
+ size_t authorizer_buf_len;
+ void *authorizer_reply_buf;
+ size_t authorizer_reply_buf_len;
+};
+
struct ceph_auth_client_ops {
const char *name;
@@ -43,9 +51,7 @@ struct ceph_auth_client_ops {
* the response to authenticate the service.
*/
int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
- struct ceph_authorizer **a,
- void **buf, size_t *len,
- void **reply_buf, size_t *reply_len);
+ struct ceph_auth_handshake *auth);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a, size_t len);
void (*destroy_authorizer)(struct ceph_auth_client *ac,
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index b8c60694b2b..e81ab30d489 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -65,7 +65,7 @@ struct ceph_file_layout {
__le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
/* object -> pg layout */
- __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
+ __le32 fl_unused; /* unused; used to be preferred primary (-1) */
__le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
} __attribute__ ((packed));
@@ -384,7 +384,7 @@ union ceph_mds_request_args {
__le32 stripe_count; /* ... */
__le32 object_size;
__le32 file_replication;
- __le32 preferred;
+ __le32 unused; /* used to be preferred osd */
} __attribute__ ((packed)) open;
struct {
__le32 flags;
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 220ae21e819..d8615dee580 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -46,9 +46,14 @@ static inline void ceph_decode_copy(void **p, void *pv, size_t n)
/*
* bounds check input.
*/
+static inline int ceph_has_room(void **p, void *end, size_t n)
+{
+ return end >= *p && n <= end - *p;
+}
+
#define ceph_decode_need(p, end, n, bad) \
do { \
- if (unlikely(*(p) + (n) > (end))) \
+ if (!likely(ceph_has_room(p, end, n))) \
goto bad; \
} while (0)
@@ -167,7 +172,7 @@ static inline void ceph_encode_string(void **p, void *end,
#define ceph_encode_need(p, end, n, bad) \
do { \
- if (unlikely(*(p) + (n) > (end))) \
+ if (!likely(ceph_has_room(p, end, n))) \
goto bad; \
} while (0)
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 3bff047f6b0..2521a95fa6d 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -25,9 +25,9 @@ struct ceph_connection_operations {
void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
/* authorize an outgoing connection */
- int (*get_authorizer) (struct ceph_connection *con,
- void **buf, int *len, int *proto,
- void **reply_buf, int *reply_len, int force_new);
+ struct ceph_auth_handshake *(*get_authorizer) (
+ struct ceph_connection *con,
+ int *proto, int force_new);
int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
int (*invalidate_authorizer)(struct ceph_connection *con);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 7c05ac202d9..cedfb1a8434 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -6,9 +6,10 @@
#include <linux/mempool.h>
#include <linux/rbtree.h>
-#include "types.h"
-#include "osdmap.h"
-#include "messenger.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/auth.h>
/*
* Maximum object name size
@@ -40,9 +41,7 @@ struct ceph_osd {
struct list_head o_requests;
struct list_head o_linger_requests;
struct list_head o_osd_lru;
- struct ceph_authorizer *o_authorizer;
- void *o_authorizer_buf, *o_authorizer_reply_buf;
- size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
+ struct ceph_auth_handshake o_auth;
unsigned long lru_ttl;
int o_marked_for_keepalive;
struct list_head o_keepalive_item;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index ba4c205cbb0..311ef8d6aa9 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -65,8 +65,6 @@ struct ceph_osdmap {
#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
#define ceph_file_layout_object_su(l) \
((__s32)le32_to_cpu((l).fl_object_stripe_unit))
-#define ceph_file_layout_pg_preferred(l) \
- ((__s32)le32_to_cpu((l).fl_pg_preferred))
#define ceph_file_layout_pg_pool(l) \
((__s32)le32_to_cpu((l).fl_pg_pool))
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 97e435b191f..7c4750811b9 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -151,16 +151,6 @@ struct crush_map {
struct crush_bucket **buckets;
struct crush_rule **rules;
- /*
- * Parent pointers to identify the parent bucket a device or
- * bucket in the hierarchy. If an item appears more than
- * once, this is the _last_ time it appeared (where buckets
- * are processed in bucket id order, from -1 on down to
- * -max_buckets.
- */
- __u32 *bucket_parents;
- __u32 *device_parents;
-
__s32 max_buckets;
__u32 max_rules;
__s32 max_devices;
@@ -168,8 +158,7 @@ struct crush_map {
/* crush.c */
-extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
-extern void crush_calc_parents(struct crush_map *map);
+extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
@@ -177,4 +166,9 @@ extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
extern void crush_destroy(struct crush_map *map);
+static inline int crush_calc_tree_node(int i)
+{
+ return ((i+1) << 1)-1;
+}
+
#endif
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index c46b99c18bb..71d79f44a7d 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -10,11 +10,10 @@
#include "crush.h"
-extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
-extern int crush_do_rule(struct crush_map *map,
+extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
+extern int crush_do_rule(const struct crush_map *map,
int ruleno,
int x, int *result, int result_max,
- int forcefeed, /* -1 for none */
- __u32 *weights);
+ const __u32 *weights);
#endif
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 214c2bb43d6..925ca583c09 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -59,9 +59,7 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
*/
static int ceph_auth_none_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
- struct ceph_authorizer **a,
- void **buf, size_t *len,
- void **reply_buf, size_t *reply_len)
+ struct ceph_auth_handshake *auth)
{
struct ceph_auth_none_info *ai = ac->private;
struct ceph_none_authorizer *au = &ai->au;
@@ -82,11 +80,12 @@ static int ceph_auth_none_create_authorizer(
dout("built authorizer len %d\n", au->buf_len);
}
- *a = (struct ceph_authorizer *)au;
- *buf = au->buf;
- *len = au->buf_len;
- *reply_buf = au->reply_buf