From 305a47b17c6efcc0e7b67b0bd41e2c12b7af758b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 16 Jan 2009 16:21:12 +0000 Subject: dlm: Change rwlock which is only used in write mode to a spinlock The ls_dirtbl[].lock was an rwlock, but since it was only used in write mode a spinlock will suffice. Signed-off-by: Steven Whitehouse Signed-off-by: David Teigland --- fs/dlm/dir.c | 18 +++++++++--------- fs/dlm/dlm_internal.h | 2 +- fs/dlm/lockspace.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 92969f879a1..858fba14aaa 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -156,7 +156,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen bucket = dir_hash(ls, name, namelen); - write_lock(&ls->ls_dirtbl[bucket].lock); + spin_lock(&ls->ls_dirtbl[bucket].lock); de = search_bucket(ls, name, namelen, bucket); @@ -173,7 +173,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen list_del(&de->list); kfree(de); out: - write_unlock(&ls->ls_dirtbl[bucket].lock); + spin_unlock(&ls->ls_dirtbl[bucket].lock); } void dlm_dir_clear(struct dlm_ls *ls) @@ -185,14 +185,14 @@ void dlm_dir_clear(struct dlm_ls *ls) DLM_ASSERT(list_empty(&ls->ls_recover_list), ); for (i = 0; i < ls->ls_dirtbl_size; i++) { - write_lock(&ls->ls_dirtbl[i].lock); + spin_lock(&ls->ls_dirtbl[i].lock); head = &ls->ls_dirtbl[i].list; while (!list_empty(head)) { de = list_entry(head->next, struct dlm_direntry, list); list_del(&de->list); put_free_de(ls, de); } - write_unlock(&ls->ls_dirtbl[i].lock); + spin_unlock(&ls->ls_dirtbl[i].lock); } } @@ -307,17 +307,17 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, bucket = dir_hash(ls, name, namelen); - write_lock(&ls->ls_dirtbl[bucket].lock); + spin_lock(&ls->ls_dirtbl[bucket].lock); de = search_bucket(ls, name, namelen, bucket); if (de) { *r_nodeid = de->master_nodeid; - write_unlock(&ls->ls_dirtbl[bucket].lock); + spin_unlock(&ls->ls_dirtbl[bucket].lock); if (*r_nodeid == nodeid) return -EEXIST; return 0; } - write_unlock(&ls->ls_dirtbl[bucket].lock); + spin_unlock(&ls->ls_dirtbl[bucket].lock); if (namelen > DLM_RESNAME_MAXLEN) return -EINVAL; @@ -330,7 +330,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, de->length = namelen; memcpy(de->name, name, namelen); - write_lock(&ls->ls_dirtbl[bucket].lock); + spin_lock(&ls->ls_dirtbl[bucket].lock); tmp = search_bucket(ls, name, namelen, bucket); if (tmp) { kfree(de); @@ -339,7 +339,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); } *r_nodeid = de->master_nodeid; - write_unlock(&ls->ls_dirtbl[bucket].lock); + spin_unlock(&ls->ls_dirtbl[bucket].lock); return 0; } diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 076e86f38bc..d01ca0a711d 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -99,7 +99,7 @@ struct dlm_direntry { struct dlm_dirtable { struct list_head list; - rwlock_t lock; + spinlock_t lock; }; struct dlm_rsbtable { diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index aa32e5f0249..cd8e2df3c29 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -487,7 +487,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, goto out_lkbfree; for (i = 0; i < size; i++) { INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); - rwlock_init(&ls->ls_dirtbl[i].lock); + spin_lock_init(&ls->ls_dirtbl[i].lock); } INIT_LIST_HEAD(&ls->ls_waiters); -- cgit v1.2.3-18-g5258 From 44ad532b3277f0cae55bfe0625d3140cf73af450 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 22 Jan 2009 13:24:49 -0800 Subject: dlm: use ipv6_addr_copy Signed-off-by: Joe Perches Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 103a5ebd137..bf09262b8b0 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "dlm_internal.h" #include "lowcomms.h" @@ -250,8 +251,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) } else { struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; - memcpy(&ret6->sin6_addr, &in6->sin6_addr, - sizeof(in6->sin6_addr)); + ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr); } return 0; -- cgit v1.2.3-18-g5258 From 2cf12c0bf261e19d9641d7b8aa220e2651a03289 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 22 Jan 2009 13:26:47 -0800 Subject: dlm: comment typo fixes Signed-off-by: Joe Perches Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index bf09262b8b0..982314c472c 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -21,7 +21,7 @@ * * Cluster nodes are referred to by their nodeids. nodeids are * simply 32 bit numbers to the locking module - if they need to - * be expanded for the cluster infrastructure then that is it's + * be expanded for the cluster infrastructure then that is its * responsibility. It is this layer's * responsibility to resolve these into IP address or * whatever it needs for inter-node communication. @@ -36,9 +36,9 @@ * of high load. Also, this way, the sending thread can collect together * messages bound for one node and send them in one block. * - * lowcomms will choose to use wither TCP or SCTP as its transport layer + * lowcomms will choose to use either TCP or SCTP as its transport layer * depending on the configuration variable 'protocol'. This should be set - * to 0 (default) for TCP or 1 for SCTP. It shouldbe configured using a + * to 0 (default) for TCP or 1 for SCTP. It should be configured using a * cluster-wide mechanism as it must be the same on all nodes of the cluster * for the DLM to function. * -- cgit v1.2.3-18-g5258 From 5e9ccc372dc855900c4a75b21286038938e288c7 Mon Sep 17 00:00:00 2001 From: Christine Caulfield Date: Wed, 28 Jan 2009 12:57:40 -0600 Subject: dlm: replace idr with hash table for connections Integer nodeids can be too large for the idr code; use a hash table instead. Signed-off-by: Christine Caulfield Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 171 +++++++++++++++++++++++++++++------------------------- 1 file changed, 92 insertions(+), 79 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 982314c472c..609108a8326 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -61,6 +60,7 @@ #include "config.h" #define NEEDED_RMEM (4*1024*1024) +#define CONN_HASH_SIZE 32 struct cbuf { unsigned int base; @@ -115,6 +115,7 @@ struct connection { int retries; #define MAX_CONNECT_RETRIES 3 int sctp_assoc; + struct hlist_node list; struct connection *othercon; struct work_struct rwork; /* Receive workqueue */ struct work_struct swork; /* Send workqueue */ @@ -139,14 +140,37 @@ static int dlm_local_count; static struct workqueue_struct *recv_workqueue; static struct workqueue_struct *send_workqueue; -static DEFINE_IDR(connections_idr); +static struct hlist_head connection_hash[CONN_HASH_SIZE]; static DEFINE_MUTEX(connections_lock); -static int max_nodeid; static struct kmem_cache *con_cache; static void process_recv_sockets(struct work_struct *work); static void process_send_sockets(struct work_struct *work); + +/* This is deliberately very simple because most clusters have simple + sequential nodeids, so we should be able to go straight to a connection + struct in the array */ +static inline int nodeid_hash(int nodeid) +{ + return nodeid & (CONN_HASH_SIZE-1); +} + +static struct connection *__find_con(int nodeid) +{ + int r; + struct hlist_node *h; + struct connection *con; + + r = nodeid_hash(nodeid); + + hlist_for_each_entry(con, h, &connection_hash[r], list) { + if (con->nodeid == nodeid) + return con; + } + return NULL; +} + /* * If 'allocation' is zero then we don't attempt to create a new * connection structure for this node. @@ -155,31 +179,17 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) { struct connection *con = NULL; int r; - int n; - con = idr_find(&connections_idr, nodeid); + con = __find_con(nodeid); if (con || !alloc) return con; - r = idr_pre_get(&connections_idr, alloc); - if (!r) - return NULL; - con = kmem_cache_zalloc(con_cache, alloc); if (!con) return NULL; - r = idr_get_new_above(&connections_idr, con, nodeid, &n); - if (r) { - kmem_cache_free(con_cache, con); - return NULL; - } - - if (n != nodeid) { - idr_remove(&connections_idr, n); - kmem_cache_free(con_cache, con); - return NULL; - } + r = nodeid_hash(nodeid); + hlist_add_head(&con->list, &connection_hash[r]); con->nodeid = nodeid; mutex_init(&con->sock_mutex); @@ -190,19 +200,30 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) /* Setup action pointers for child sockets */ if (con->nodeid) { - struct connection *zerocon = idr_find(&connections_idr, 0); + struct connection *zerocon = __find_con(0); con->connect_action = zerocon->connect_action; if (!con->rx_action) con->rx_action = zerocon->rx_action; } - if (nodeid > max_nodeid) - max_nodeid = nodeid; - return con; } +/* Loop round all connections */ +static void foreach_conn(void (*conn_func)(struct connection *c)) +{ + int i; + struct hlist_node *h, *n; + struct connection *con; + + for (i = 0; i < CONN_HASH_SIZE; i++) { + hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ + conn_func(con); + } + } +} + static struct connection *nodeid2con(int nodeid, gfp_t allocation) { struct connection *con; @@ -218,14 +239,17 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) static struct connection *assoc2con(int assoc_id) { int i; + struct hlist_node *h; struct connection *con; mutex_lock(&connections_lock); - for (i=0; i<=max_nodeid; i++) { - con = __nodeid2con(i, 0); - if (con && con->sctp_assoc == assoc_id) { - mutex_unlock(&connections_lock); - return con; + + for (i = 0 ; i < CONN_HASH_SIZE; i++) { + hlist_for_each_entry(con, h, &connection_hash[i], list) { + if (con && con->sctp_assoc == assoc_id) { + mutex_unlock(&connections_lock); + return con; + } } } mutex_unlock(&connections_lock); @@ -376,25 +400,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) log_print("send EOF to node failed: %d", ret); } +static void sctp_init_failed_foreach(struct connection *con) +{ + con->sctp_assoc = 0; + if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { + if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) + queue_work(send_workqueue, &con->swork); + } +} + /* INIT failed but we don't know which node... restart INIT on all pending nodes */ static void sctp_init_failed(void) { - int i; - struct connection *con; - mutex_lock(&connections_lock); - for (i=1; i<=max_nodeid; i++) { - con = __nodeid2con(i, 0); - if (!con) - continue; - con->sctp_assoc = 0; - if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { - if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { - queue_work(send_workqueue, &con->swork); - } - } - } + + foreach_conn(sctp_init_failed_foreach); + mutex_unlock(&connections_lock); } @@ -1313,13 +1335,10 @@ out_connect: static void clean_one_writequeue(struct connection *con) { - struct list_head *list; - struct list_head *temp; + struct writequeue_entry *e, *safe; spin_lock(&con->writequeue_lock); - list_for_each_safe(list, temp, &con->writequeue) { - struct writequeue_entry *e = - list_entry(list, struct writequeue_entry, list); + list_for_each_entry_safe(e, safe, &con->writequeue, list) { list_del(&e->list); free_entry(e); } @@ -1369,14 +1388,7 @@ static void process_send_sockets(struct work_struct *work) /* Discard all entries on the write queues */ static void clean_writequeues(void) { - int nodeid; - - for (nodeid = 1; nodeid <= max_nodeid; nodeid++) { - struct connection *con = __nodeid2con(nodeid, 0); - - if (con) - clean_one_writequeue(con); - } + foreach_conn(clean_one_writequeue); } static void work_stop(void) @@ -1406,23 +1418,29 @@ static int work_start(void) return 0; } -void dlm_lowcomms_stop(void) +static void stop_conn(struct connection *con) { - int i; - struct connection *con; + con->flags |= 0x0F; + if (con->sock) + con->sock->sk->sk_user_data = NULL; +} +static void free_conn(struct connection *con) +{ + close_connection(con, true); + if (con->othercon) + kmem_cache_free(con_cache, con->othercon); + hlist_del(&con->list); + kmem_cache_free(con_cache, con); +} + +void dlm_lowcomms_stop(void) +{ /* Set all the flags to prevent any socket activity. */ mutex_lock(&connections_lock); - for (i = 0; i <= max_nodeid; i++) { - con = __nodeid2con(i, 0); - if (con) { - con->flags |= 0x0F; - if (con->sock) - con->sock->sk->sk_user_data = NULL; - } - } + foreach_conn(stop_conn); mutex_unlock(&connections_lock); work_stop(); @@ -1430,25 +1448,20 @@ void dlm_lowcomms_stop(void) mutex_lock(&connections_lock); clean_writequeues(); - for (i = 0; i <= max_nodeid; i++) { - con = __nodeid2con(i, 0); - if (con) { - close_connection(con, true); - if (con->othercon) - kmem_cache_free(con_cache, con->othercon); - kmem_cache_free(con_cache, con); - } - } - max_nodeid = 0; + foreach_conn(free_conn); + mutex_unlock(&connections_lock); kmem_cache_destroy(con_cache); - idr_init(&connections_idr); } int dlm_lowcomms_start(void) { int error = -EINVAL; struct connection *con; + int i; + + for (i = 0; i < CONN_HASH_SIZE; i++) + INIT_HLIST_HEAD(&connection_hash[i]); init_local(); if (!dlm_local_count) { -- cgit v1.2.3-18-g5258 From 43279e5376017c40b4be9af5bc79cbb4ef6f53d7 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 28 Jan 2009 14:37:54 -0600 Subject: dlm: clear defunct cancel state When a conversion completes successfully and finds that a cancel of the convert is still in progress (which is now a moot point), preemptively clear the state associated with outstanding cancel. That state could cause a subsequent conversion to be ignored. Also, improve the consistency and content of error and debug messages in this area. Signed-off-by: David Teigland --- fs/dlm/lock.c | 53 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 01e7d39c5fb..8cb92046a58 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -835,7 +835,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) lkb->lkb_wait_count++; hold_lkb(lkb); - log_debug(ls, "add overlap %x cur %d new %d count %d flags %x", + log_debug(ls, "addwait %x cur %d overlap %d count %d f %x", lkb->lkb_id, lkb->lkb_wait_type, mstype, lkb->lkb_wait_count, lkb->lkb_flags); goto out; @@ -851,7 +851,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); out: if (error) - log_error(ls, "add_to_waiters %x error %d flags %x %d %d %s", + log_error(ls, "addwait error %x %d flags %x %d %d %s", lkb->lkb_id, error, lkb->lkb_flags, mstype, lkb->lkb_wait_type, lkb->lkb_resource->res_name); mutex_unlock(&ls->ls_waiters_mutex); @@ -863,23 +863,55 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) request reply on the requestqueue) between dlm_recover_waiters_pre() which set RESEND and dlm_recover_waiters_post() */ -static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) +static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, + struct dlm_message *ms) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; int overlap_done = 0; if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) { + log_debug(ls, "remwait %x unlock_reply overlap", lkb->lkb_id); lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; overlap_done = 1; goto out_del; } if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) { + log_debug(ls, "remwait %x cancel_reply overlap", lkb->lkb_id); lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; overlap_done = 1; goto out_del; } + /* Cancel state was preemptively cleared by a successful convert, + see next comment, nothing to do. */ + + if ((mstype == DLM_MSG_CANCEL_REPLY) && + (lkb->lkb_wait_type != DLM_MSG_CANCEL)) { + log_debug(ls, "remwait %x cancel_reply wait_type %d", + lkb->lkb_id, lkb->lkb_wait_type); + return -1; + } + + /* Remove for the convert reply, and premptively remove for the + cancel reply. A convert has been granted while there's still + an outstanding cancel on it (the cancel is moot and the result + in the cancel reply should be 0). We preempt the cancel reply + because the app gets the convert result and then can follow up + with another op, like convert. This subsequent op would see the + lingering state of the cancel and fail with -EBUSY. */ + + if ((mstype == DLM_MSG_CONVERT_REPLY) && + (lkb->lkb_wait_type == DLM_MSG_CONVERT) && + is_overlap_cancel(lkb) && ms && !ms->m_result) { + log_debug(ls, "remwait %x convert_reply zap overlap_cancel", + lkb->lkb_id); + lkb->lkb_wait_type = 0; + lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; + lkb->lkb_wait_count--; + goto out_del; + } + /* N.B. type of reply may not always correspond to type of original msg due to lookup->request optimization, verify others? */ @@ -888,8 +920,8 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) goto out_del; } - log_error(ls, "remove_from_waiters lkid %x flags %x types %d %d", - lkb->lkb_id, lkb->lkb_flags, mstype, lkb->lkb_wait_type); + log_error(ls, "remwait error %x reply %d flags %x no wait_type", + lkb->lkb_id, mstype, lkb->lkb_flags); return -1; out_del: @@ -899,7 +931,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) this would happen */ if (overlap_done && lkb->lkb_wait_type) { - log_error(ls, "remove_from_waiters %x reply %d give up on %d", + log_error(ls, "remwait error %x reply %d wait_type %d overlap", lkb->lkb_id, mstype, lkb->lkb_wait_type); lkb->lkb_wait_count--; lkb->lkb_wait_type = 0; @@ -921,7 +953,7 @@ static int remove_from_waiters(struct dlm_lkb *lkb, int mstype) int error; mutex_lock(&ls->ls_waiters_mutex); - error = _remove_from_waiters(lkb, mstype); + error = _remove_from_waiters(lkb, mstype, NULL); mutex_unlock(&ls->ls_waiters_mutex); return error; } @@ -936,7 +968,7 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) if (ms != &ls->ls_stub_ms) mutex_lock(&ls->ls_waiters_mutex); - error = _remove_from_waiters(lkb, ms->m_type); + error = _remove_from_waiters(lkb, ms->m_type, ms); if (ms != &ls->ls_stub_ms) mutex_unlock(&ls->ls_waiters_mutex); return error; @@ -2083,6 +2115,11 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_timeout_cs = args->timeout; rv = 0; out: + if (rv) + log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s", + rv, lkb->lkb_id, lkb->lkb_flags, args->flags, + lkb->lkb_status, lkb->lkb_wait_type, + lkb->lkb_resource->res_name); return rv; } -- cgit v1.2.3-18-g5258 From a536e38125fe5da8ed49690f30c30a8f651cf1f5 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 27 Feb 2009 15:23:28 -0600 Subject: dlm: ignore cancel on granted lock Return immediately from dlm_unlock(CANCEL) if the lock is granted and not being converted; there's nothing to cancel. Signed-off-by: David Teigland --- fs/dlm/lock.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 8cb92046a58..205ec95b347 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -2186,6 +2186,13 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) goto out; } + /* there's nothing to cancel */ + if (lkb->lkb_status == DLM_LKSTS_GRANTED && + !lkb->lkb_wait_type) { + rv = -EBUSY; + goto out; + } + switch (lkb->lkb_wait_type) { case DLM_MSG_LOOKUP: case DLM_MSG_REQUEST: -- cgit v1.2.3-18-g5258 From 1fecb1c4b62881e3689ba2dcf93072ae301b597c Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 4 Mar 2009 11:17:23 -0600 Subject: dlm: fix length calculation in compat code Using offsetof() to calculate name length does not work because it does not produce consistent results with with structure packing. This caused memcpy to corrupt memory by copying 4 extra bytes off the end of the buffer on 64 bit kernels with 32 bit userspace (the only case where this 32/64 compat code is used). The fix is to calculate name length directly from the start instead of trying to derive it later using count and offsetof. Signed-off-by: David Teigland --- fs/dlm/user.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 065149e84f4..ebce994ab0b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. + * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -84,7 +84,7 @@ struct dlm_lock_result32 { static void compat_input(struct dlm_write_request *kb, struct dlm_write_request32 *kb32, - size_t count) + int namelen) { kb->version[0] = kb32->version[0]; kb->version[1] = kb32->version[1]; @@ -96,8 +96,7 @@ static void compat_input(struct dlm_write_request *kb, kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { kb->i.lspace.flags = kb32->i.lspace.flags; kb->i.lspace.minor = kb32->i.lspace.minor; - memcpy(kb->i.lspace.name, kb32->i.lspace.name, count - - offsetof(struct dlm_write_request32, i.lspace.name)); + memcpy(kb->i.lspace.name, kb32->i.lspace.name, namelen); } else if (kb->cmd == DLM_USER_PURGE) { kb->i.purge.nodeid = kb32->i.purge.nodeid; kb->i.purge.pid = kb32->i.purge.pid; @@ -115,8 +114,7 @@ static void compat_input(struct dlm_write_request *kb, kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); - memcpy(kb->i.lock.name, kb32->i.lock.name, count - - offsetof(struct dlm_write_request32, i.lock.name)); + memcpy(kb->i.lock.name, kb32->i.lock.name, namelen); } } @@ -539,9 +537,16 @@ static ssize_t device_write(struct file *file, const char __user *buf, #ifdef CONFIG_COMPAT if (!kbuf->is64bit) { struct dlm_write_request32 *k32buf; + int namelen = 0; + + if (count > sizeof(struct dlm_write_request32)) + namelen = count - sizeof(struct dlm_write_request32); + k32buf = (struct dlm_write_request32 *)kbuf; - kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - - sizeof(struct dlm_write_request32)), GFP_KERNEL); + + /* add 1 after namelen so that the name string is terminated */ + kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1, + GFP_KERNEL); if (!kbuf) { kfree(k32buf); return -ENOMEM; @@ -549,7 +554,8 @@ static ssize_t device_write(struct file *file, const char __user *buf, if (proc) set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); - compat_input(kbuf, k32buf, count + 1); + + compat_input(kbuf, k32buf, namelen); kfree(k32buf); } #endif -- cgit v1.2.3-18-g5258