diff options
Diffstat (limited to 'net/rds')
38 files changed, 264 insertions, 397 deletions
diff --git a/net/rds/Kconfig b/net/rds/Kconfig index ec753b3ae72..f2c670ba7b9 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -1,7 +1,7 @@ config RDS - tristate "The RDS Protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "The RDS Protocol" + depends on INET ---help--- The RDS (Reliable Datagram Sockets) protocol provides reliable, sequenced delivery of datagrams over Infiniband, iWARP, diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index bb6ad81b671..424ff622ab5 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -68,7 +68,6 @@ static int rds_release(struct socket *sock) { struct sock *sk = sock->sk; struct rds_sock *rs; - unsigned long flags; if (!sk) goto out; @@ -94,10 +93,10 @@ static int rds_release(struct socket *sock) rds_rdma_drop_keys(rs); rds_notify_queue_get(rs, NULL); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_del_init(&rs->rs_item); rds_sock_count--; - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); rds_trans_put(rs->rs_transport); @@ -409,7 +408,6 @@ static const struct proto_ops rds_proto_ops = { static int __rds_create(struct socket *sock, struct sock *sk, int protocol) { - unsigned long flags; struct rds_sock *rs; sock_init_data(sock, sk); @@ -426,10 +424,10 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_add_tail(&rs->rs_item, &rds_sock_list); rds_sock_count++; - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); return 0; } @@ -471,12 +469,11 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, { struct rds_sock *rs; struct rds_incoming *inc; - unsigned long flags; unsigned int total = 0; len /= sizeof(struct rds_info_message); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { read_lock(&rs->rs_recv_lock); @@ -492,7 +489,7 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, read_unlock(&rs->rs_recv_lock); } - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); lens->nr = total; lens->each = sizeof(struct rds_info_message); @@ -504,11 +501,10 @@ static void rds_sock_info(struct socket *sock, unsigned int len, { struct rds_info_socket sinfo; struct rds_sock *rs; - unsigned long flags; len /= sizeof(struct rds_info_socket); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); if (len < rds_sock_count) goto out; @@ -529,7 +525,7 @@ out: lens->nr = rds_sock_count; lens->each = sizeof(struct rds_info_socket); - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); } static void rds_exit(void) diff --git a/net/rds/bind.c b/net/rds/bind.c index 2f6b3fcc79f..a2e6562da75 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -35,6 +35,7 @@ #include <linux/in.h> #include <linux/if_arp.h> #include <linux/jhash.h> +#include <linux/ratelimit.h> #include "rds.h" #define BIND_HASH_SIZE 1024 @@ -51,13 +52,12 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port, struct rds_sock *insert) { struct rds_sock *rs; - struct hlist_node *node; struct hlist_head *head = hash_to_bucket(addr, port); u64 cmp; u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); rcu_read_lock(); - hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) { + hlist_for_each_entry_rcu(rs, head, rs_bound_node) { cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | be16_to_cpu(rs->rs_bound_port); @@ -117,7 +117,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) rover = be16_to_cpu(*port); last = rover; } else { - rover = max_t(u16, net_random(), 2); + rover = max_t(u16, prandom_u32(), 2); last = rover - 1; } @@ -185,8 +185,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!trans) { ret = -EADDRNOTAVAIL; rds_remove_bound(rs); - if (printk_ratelimit()) - printk(KERN_INFO "RDS: rds_bind() could not find a transport, " + printk_ratelimited(KERN_INFO "RDS: rds_bind() could not find a transport, " "load rds_tcp or rds_rdma?\n"); goto out; } diff --git a/net/rds/cong.c b/net/rds/cong.c index 75ea686f27d..e5b65acd650 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -33,8 +33,8 @@ #include <linux/slab.h> #include <linux/types.h> #include <linux/rbtree.h> - -#include <asm-generic/bitops/le.h> +#include <linux/bitops.h> +#include <linux/export.h> #include "rds.h" @@ -285,7 +285,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - generic___set_le_bit(off, (void *)map->m_page_addrs[i]); + __set_bit_le(off, (void *)map->m_page_addrs[i]); } void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) @@ -299,7 +299,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - generic___clear_le_bit(off, (void *)map->m_page_addrs[i]); + __clear_bit_le(off, (void *)map->m_page_addrs[i]); } static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) @@ -310,7 +310,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - return generic_test_le_bit(off, (void *)map->m_page_addrs[i]); + return test_bit_le(off, (void *)map->m_page_addrs[i]); } void rds_cong_add_socket(struct rds_sock *rs) diff --git a/net/rds/connection.c b/net/rds/connection.c index 9334d892366..378c3a6acf8 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/inet_hashtables.h> #include "rds.h" @@ -50,10 +51,16 @@ static struct kmem_cache *rds_conn_slab; static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) { + static u32 rds_hash_secret __read_mostly; + + unsigned long hash; + + net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); + /* Pass NULL, don't need struct net for hash */ - unsigned long hash = inet_ehashfn(NULL, - be32_to_cpu(laddr), 0, - be32_to_cpu(faddr), 0); + hash = __inet_ehashfn(be32_to_cpu(laddr), 0, + be32_to_cpu(faddr), 0, + rds_hash_secret); return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; } @@ -68,9 +75,8 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head, struct rds_transport *trans) { struct rds_connection *conn, *ret = NULL; - struct hlist_node *pos; - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { if (conn->c_faddr == faddr && conn->c_laddr == laddr && conn->c_trans == trans) { ret = conn; @@ -375,7 +381,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, int want_send) { struct hlist_head *head; - struct hlist_node *pos; struct list_head *list; struct rds_connection *conn; struct rds_message *rm; @@ -389,7 +394,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); i++, head++) { - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { if (want_send) list = &conn->c_send_queue; else @@ -438,7 +443,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, { uint64_t buffer[(item_len + 7) / 8]; struct hlist_head *head; - struct hlist_node *pos; struct rds_connection *conn; size_t i; @@ -449,7 +453,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); i++, head++) { - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { /* XXX no c_lock usage.. */ if (!visitor(conn, buffer)) diff --git a/net/rds/ib.c b/net/rds/ib.c index 4123967d4d6..ba2dffeff60 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -38,6 +38,7 @@ #include <linux/if_arp.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/module.h> #include "rds.h" #include "ib.h" @@ -325,7 +326,7 @@ static int rds_ib_laddr_check(__be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); @@ -337,7 +338,8 @@ static int rds_ib_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support iWARP devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", @@ -364,7 +366,6 @@ void rds_ib_exit(void) rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); - rds_ib_fmr_exit(); } struct rds_transport rds_ib_transport = { @@ -400,13 +401,9 @@ int rds_ib_init(void) INIT_LIST_HEAD(&rds_ib_devices); - ret = rds_ib_fmr_init(); - if (ret) - goto out; - ret = ib_register_client(&rds_ib_client); if (ret) - goto out_fmr_exit; + goto out; ret = rds_ib_sysctl_init(); if (ret) @@ -430,8 +427,6 @@ out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); -out_fmr_exit: - rds_ib_fmr_exit(); out: return ret; } diff --git a/net/rds/ib.h b/net/rds/ib.h index e34ad032b66..7280ab8810c 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -3,6 +3,7 @@ #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> +#include <linux/interrupt.h> #include <linux/pci.h> #include <linux/slab.h> #include "rds.h" @@ -49,7 +50,7 @@ struct rds_ib_cache_head { }; struct rds_ib_refill_cache { - struct rds_ib_cache_head *percpu; + struct rds_ib_cache_head __percpu *percpu; struct list_head *xfer; struct list_head *ready; }; @@ -185,8 +186,7 @@ struct rds_ib_device { struct work_struct free_work; }; -#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus) -#define ibdev_to_node(ibdev) pcidev_to_node(to_pci_dev(ibdev->dma_device)) +#define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device) #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) /* bits for i_ack_flags */ @@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); -int rds_ib_fmr_init(void); -void rds_ib_fmr_exit(void); /* ib_recv.c */ int rds_ib_recv_init(void); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index ee369d201a6..31b74f5e61a 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/ratelimit.h> #include "rds.h" #include "ib.h" @@ -374,23 +375,21 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto out; } - ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), + ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), ibdev_to_node(dev)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } - memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); - ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), + ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), ibdev_to_node(dev)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } - memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); rds_ib_recv_init_ack(ic); @@ -435,13 +434,11 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; - } else if (printk_ratelimit()) { - printk(KERN_NOTICE "RDS: Connection from %pI4 using " - "incompatible protocol version %u.%u\n", - &dp->dp_saddr, - dp->dp_protocol_major, - dp->dp_protocol_minor); - } + } else + printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n", + &dp->dp_saddr, + dp->dp_protocol_major, + dp->dp_protocol_minor); return version; } @@ -587,7 +584,7 @@ int rds_ib_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; @@ -751,7 +748,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) int ret; /* XXX too lazy? */ - ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); + ic = kzalloc(sizeof(struct rds_ib_connection), gfp); if (!ic) return -ENOMEM; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 18a833c450c..e8fdb172adb 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -33,12 +33,10 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/rculist.h> +#include <linux/llist.h> #include "rds.h" #include "ib.h" -#include "xlist.h" - -static struct workqueue_struct *rds_ib_fmr_wq; static DEFINE_PER_CPU(unsigned long, clean_list_grace); #define CLEAN_LIST_BUSY_BIT 0 @@ -51,7 +49,7 @@ struct rds_ib_mr { struct rds_ib_mr_pool *pool; struct ib_fmr *fmr; - struct xlist_head xlist; + struct llist_node llnode; /* unmap_list is for freeing */ struct list_head unmap_list; @@ -73,9 +71,9 @@ struct rds_ib_mr_pool { atomic_t item_count; /* total # of MRs */ atomic_t dirty_count; /* # dirty of MRs */ - struct xlist_head drop_list; /* MRs that have reached their max_maps limit */ - struct xlist_head free_list; /* unused MRs */ - struct xlist_head clean_list; /* global unused & unamapped MRs */ + struct llist_head drop_list; /* MRs that have reached their max_maps limit */ + struct llist_head free_list; /* unused MRs */ + struct llist_head clean_list; /* global unused & unamapped MRs */ wait_queue_head_t flush_wait; atomic_t free_pinned; /* memory pinned by free MRs */ @@ -222,9 +220,9 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) if (!pool) return ERR_PTR(-ENOMEM); - INIT_XLIST_HEAD(&pool->free_list); - INIT_XLIST_HEAD(&pool->drop_list); - INIT_XLIST_HEAD(&pool->clean_list); + init_llist_head(&pool->free_list); + init_llist_head(&pool->drop_list); + init_llist_head(&pool->clean_list); mutex_init(&pool->flush_lock); init_waitqueue_head(&pool->flush_wait); INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); @@ -262,26 +260,18 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) kfree(pool); } -static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl, - struct rds_ib_mr **ibmr_ret) -{ - struct xlist_head *ibmr_xl; - ibmr_xl = xlist_del_head_fast(xl); - *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist); -} - static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; - struct xlist_head *ret; + struct llist_node *ret; unsigned long *flag; preempt_disable(); flag = &__get_cpu_var(clean_list_grace); set_bit(CLEAN_LIST_BUSY_BIT, flag); - ret = xlist_del_head(&pool->clean_list); + ret = llist_del_first(&pool->clean_list); if (ret) - ibmr = list_entry(ret, struct rds_ib_mr, xlist); + ibmr = llist_entry(ret, struct rds_ib_mr, llnode); clear_bit(CLEAN_LIST_BUSY_BIT, flag); preempt_enable(); @@ -307,7 +297,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) int err = 0, iter = 0; if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); while (1) { ibmr = rds_ib_reuse_fmr(pool); @@ -531,46 +521,44 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr } /* - * given an xlist of mrs, put them all into the list_head for more processing + * given an llist of mrs, put them all into the list_head for more processing */ -static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list) +static void llist_append_to_list(struct llist_head *llist, struct list_head *list) { struct rds_ib_mr *ibmr; - struct xlist_head splice; - struct xlist_head *cur; - struct xlist_head *next; - - splice.next = NULL; - xlist_splice(xlist, &splice); - cur = splice.next; - while (cur) { - next = cur->next; - ibmr = list_entry(cur, struct rds_ib_mr, xlist); + struct llist_node *node; + struct llist_node *next; + + node = llist_del_all(llist); + while (node) { + next = node->next; + ibmr = llist_entry(node, struct rds_ib_mr, llnode); list_add_tail(&ibmr->unmap_list, list); - cur = next; + node = next; } } /* - * this takes a list head of mrs and turns it into an xlist of clusters. - * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for - * reuse. + * this takes a list head of mrs and turns it into linked llist nodes + * of clusters. Each cluster has linked llist nodes of + * MR_CLUSTER_SIZE mrs that are ready for reuse. */ -static void list_append_to_xlist(struct rds_ib_mr_pool *pool, - struct list_head *list, struct xlist_head *xlist, - struct xlist_head **tail_ret) +static void list_to_llist_nodes(struct rds_ib_mr_pool *pool, + struct list_head *list, + struct llist_node **nodes_head, + struct llist_node **nodes_tail) { struct rds_ib_mr *ibmr; - struct xlist_head *cur_mr = xlist; - struct xlist_head *tail_mr = NULL; + struct llist_node *cur = NULL; + struct llist_node **next = nodes_head; list_for_each_entry(ibmr, list, unmap_list) { - tail_mr = &ibmr->xlist; - tail_mr->next = NULL; - cur_mr->next = tail_mr; - cur_mr = tail_mr; + cur = &ibmr->llnode; + *next = cur; + next = &cur->next; } - *tail_ret = tail_mr; + *next = NULL; + *nodes_tail = cur; } /* @@ -583,8 +571,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **ibmr_ret) { struct rds_ib_mr *ibmr, *next; - struct xlist_head clean_xlist; - struct xlist_head *clean_tail; + struct llist_node *clean_nodes; + struct llist_node *clean_tail; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); unsigned long unpinned = 0; @@ -605,7 +593,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, prepare_to_wait(&pool->flush_wait, &wait, TASK_UNINTERRUPTIBLE); - if (xlist_empty(&pool->clean_list)) + if (llist_empty(&pool->clean_list)) schedule(); ibmr = rds_ib_reuse_fmr(pool); @@ -630,10 +618,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, /* Get the list of all MRs to be dropped. Ordering matters - * we want to put drop_list ahead of free_list. */ - xlist_append_to_list(&pool->drop_list, &unmap_list); - xlist_append_to_list(&pool->free_list, &unmap_list); + llist_append_to_list(&pool->drop_list, &unmap_list); + llist_append_to_list(&pool->free_list, &unmap_list); if (free_all) - xlist_append_to_list(&pool->clean_list, &unmap_list); + llist_append_to_list(&pool->clean_list, &unmap_list); free_goal = rds_ib_flush_goal(pool, free_all); @@ -665,22 +653,22 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, if (!list_empty(&unmap_list)) { /* we have to make sure that none of the things we're about * to put on the clean list would race with other cpus trying - * to pull items off. The xlist would explode if we managed to + * to pull items off. The llist would explode if we managed to * remove something from the clean list and then add it back again - * while another CPU was spinning on that same item in xlist_del_head. + * while another CPU was spinning on that same item in llist_del_first. * - * This is pretty unlikely, but just in case wait for an xlist grace period + * This is pretty unlikely, but just in case wait for an llist grace period * here before adding anything back into the clean list. */ wait_clean_list_grace(); - list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail); + list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) - refill_local(pool, &clean_xlist, ibmr_ret); + *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); - /* refill_local may have emptied our list */ - if (!xlist_empty(&clean_xlist)) - xlist_add(clean_xlist.next, clean_tail, &pool->clean_list); + /* more than one entry in llist nodes */ + if (clean_nodes->next) + llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list); } @@ -696,24 +684,6 @@ out_nolock: return ret; } -int rds_ib_fmr_init(void) -{ - rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd"); - if (!rds_ib_fmr_wq) - return -ENOMEM; - return 0; -} - -/* - * By the time this is called all the IB devices should have been torn down and - * had their pools freed. As each pool is freed its work struct is waited on, - * so the pool flushing work queue should be idle by the time we get here. - */ -void rds_ib_fmr_exit(void) -{ - destroy_workqueue(rds_ib_fmr_wq); -} - static void rds_ib_mr_pool_flush_worker(struct work_struct *work) { struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); @@ -731,9 +701,9 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* Return it to the pool's free list */ if (ibmr->remap_count >= pool->fmr_attr.max_maps) - xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list); + llist_add(&ibmr->llnode, &pool->drop_list); else - xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list); + llist_add(&ibmr->llnode, &pool->free_list); atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_inc(&pool->dirty_count); @@ -741,7 +711,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* If we've pinned too many pages, request a flush */ if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); if (invalidate) { if (likely(!in_interrupt())) { @@ -749,8 +719,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) } else { /* We get here if the user created a MR marked * as use_once and invalidate at the same time. */ - queue_delayed_work(rds_ib_fmr_wq, - &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); } } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index e29e0ca32f7..d67de453c35 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -339,8 +339,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, sge->length = sizeof(struct rds_header); sge = &recv->r_sge[1]; - sge->addr = sg_dma_address(&recv->r_frag->f_sg); - sge->length = sg_dma_len(&recv->r_frag->f_sg); + sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg); + sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg); ret = 0; out: @@ -381,7 +381,10 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill) ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, recv->r_ibinc, sg_page(&recv->r_frag->f_sg), - (long) sg_dma_address(&recv->r_frag->f_sg), ret); + (long) ib_sg_dma_address( + ic->i_cm_id->device, + &recv->r_frag->f_sg), + ret); if (ret) { rds_ib_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " @@ -418,20 +421,20 @@ static void rds_ib_recv_cache_put(struct list_head *new_item, struct rds_ib_refill_cache *cache) { unsigned long flags; - struct rds_ib_cache_head *chp; - struct list_head *old; + struct list_head *old, *chpfirst; local_irq_save(flags); - chp = per_cpu_ptr(cache->percpu, smp_processor_id()); - if (!chp->first) + chpfirst = __this_cpu_read(cache->percpu->first); + if (!chpfirst) INIT_LIST_HEAD(new_item); else /* put on front */ - list_add_tail(new_item, chp->first); - chp->first = new_item; - chp->count++; + list_add_tail(new_item, chpfirst); - if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT) + __this_cpu_write(cache->percpu->first, new_item); + __this_cpu_inc(cache->percpu->count); + + if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT) goto end; /* @@ -443,12 +446,13 @@ static void rds_ib_recv_cache_put(struct list_head *new_item, do { old = xchg(&cache->xfer, NULL); if (old) - list_splice_entire_tail(old, chp->first); - old = cmpxchg(&cache->xfer, NULL, chp->first); + list_splice_entire_tail(old, chpfirst); + old = cmpxchg(&cache->xfer, NULL, chpfirst); } while (old); - chp->first = NULL; - chp->count = 0; + + __this_cpu_write(cache->percpu->first, NULL); + __this_cpu_write(cache->percpu->count, 0); end: local_irq_restore(flags); } @@ -594,7 +598,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, { atomic64_set(&ic->i_ack_next, seq); if (ack_required) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); } } @@ -602,7 +606,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, static u64 rds_ib_get_ack(struct rds_ib_connection *ic) { clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return atomic64_read(&ic->i_ack_next); } @@ -763,7 +767,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ - addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0); + addr = kmap_atomic(sg_page(&frag->f_sg)); src = addr + frag_off; dst = (void *)map->m_page_addrs[map_page] + map_off; @@ -773,7 +777,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, uncongested |= ~(*src) & *dst; *dst++ = *src++; } - kunmap_atomic(addr, KM_SOFTIRQ0); + kunmap_atomic(addr); copied += to_copy; @@ -826,7 +830,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, if (data_len < sizeof(struct rds_header)) { rds_ib_conn_error(conn, "incoming message " - "from %pI4 didn't inclue a " + "from %pI4 didn't include a " "header, disconnecting and " "reconnecting\n", &conn->c_faddr); @@ -919,8 +923,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, rds_ib_cong_recv(conn, ibinc); else { rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, - &ibinc->ii_inc, GFP_ATOMIC, - KM_SOFTIRQ0); + &ibinc->ii_inc, GFP_ATOMIC); state->ack_next = be64_to_cpu(hdr->h_sequence); state->ack_next_valid = 1; } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 71f373c421b..1dde91e3dc7 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/device.h> #include <linux/dmapool.h> +#include <linux/ratelimit.h> #include "rds.h" #include "ib.h" @@ -207,8 +208,7 @@ static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic, } break; default: - if (printk_ratelimit()) - printk(KERN_NOTICE + printk_ratelimited(KERN_NOTICE "RDS/IB: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; @@ -298,7 +298,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rds_ib_stats_inc(s_ib_tx_cq_event); if (wc.wr_id == RDS_IB_ACK_WR_ID) { - if (ic->i_ack_queued + HZ/2 < jiffies) + if (time_after(jiffies, ic->i_ack_queued + HZ/2)) rds_ib_stats_inc(s_ib_tx_stalled); rds_ib_ack_send_complete(ic); continue; @@ -315,7 +315,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rm = rds_ib_send_unmap_op(ic, send, wc.status); - if (send->s_queued + HZ/2 < jiffies) + if (time_after(jiffies, send->s_queued + HZ/2)) rds_ib_stats_inc(s_ib_tx_stalled); if (send->s_op) { @@ -355,7 +355,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) * * Conceptually, we have two counters: * - send credits: this tells us how many WRs we're allowed - * to submit without overruning the reciever's queue. For + * to submit without overruning the receiver's queue. For * each SEND WR we post, we decrement this by one. * * - posted credits: this tells us how many WRs we recently @@ -551,7 +551,9 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (conn->c_loopback && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + scat = &rm->data.op_sg[sg]; + ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length); + return sizeof(struct rds_header) + ret; } /* FIXME we may overallocate here */ diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index 1253b006efd..e4e41b3afce 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -61,7 +61,7 @@ static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; */ unsigned int rds_ib_sysctl_flow_control = 0; -static ctl_table rds_ib_sysctl_table[] = { +static struct ctl_table rds_ib_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_ib_sysctl_max_send_wr, @@ -106,22 +106,15 @@ static ctl_table rds_ib_sysctl_table[] = { { } }; -static struct ctl_path rds_ib_sysctl_path[] = { - { .procname = "net", }, - { .procname = "rds", }, - { .procname = "ib", }, - { } -}; - void rds_ib_sysctl_exit(void) { if (rds_ib_sysctl_hdr) - unregister_sysctl_table(rds_ib_sysctl_hdr); + unregister_net_sysctl_table(rds_ib_sysctl_hdr); } int rds_ib_sysctl_init(void) { - rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table); + rds_ib_sysctl_hdr = register_net_sysctl(&init_net, "net/rds/ib", rds_ib_sysctl_table); if (!rds_ib_sysctl_hdr) return -ENOMEM; return 0; diff --git a/net/rds/info.c b/net/rds/info.c index 4fdf1b6e84f..9a6b4f66187 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -34,6 +34,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/proc_fs.h> +#include <linux/export.h> #include "rds.h" @@ -103,7 +104,7 @@ EXPORT_SYMBOL_GPL(rds_info_deregister_func); void rds_info_iter_unmap(struct rds_info_iterator *iter) { if (iter->addr) { - kunmap_atomic(iter->addr, KM_USER0); + kunmap_atomic(iter->addr); iter->addr = NULL; } } @@ -118,7 +119,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data, while (bytes) { if (!iter->addr) - iter->addr = kmap_atomic(*iter->pages, KM_USER0); + iter->addr = kmap_atomic(*iter->pages); this = min(bytes, PAGE_SIZE - iter->offset); @@ -133,7 +134,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data, iter->offset += this; if (iter->offset == PAGE_SIZE) { - kunmap_atomic(iter->addr, KM_USER0); + kunmap_atomic(iter->addr); iter->addr = NULL; iter->offset = 0; iter->pages++; diff --git a/net/rds/iw.c b/net/rds/iw.c index 5a9676fe594..589935661d6 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -38,6 +38,7 @@ #include <linux/if_arp.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/module.h> #include "rds.h" #include "iw.h" @@ -226,7 +227,7 @@ static int rds_iw_laddr_check(__be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); @@ -238,7 +239,8 @@ static int rds_iw_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support IB devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_RNIC) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_RNIC) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", diff --git a/net/rds/iw.h b/net/rds/iw.h index 90151922178..04ce3b193f7 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -1,6 +1,7 @@ #ifndef _RDS_IW_H #define _RDS_IW_H +#include <linux/interrupt.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include "rds.h" diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 712cf2d1f28..a91e1db62ee 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -181,7 +182,7 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, unsigned int send_size, recv_size; int ret; - /* The offset of 1 is to accomodate the additional ACK WR. */ + /* The offset of 1 is to accommodate the additional ACK WR. */ send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1); recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1); rds_iw_ring_resize(send_ring, send_size - 1); @@ -258,8 +259,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn) */ rds_iwdev = ib_get_client_data(dev, &rds_iw_client); if (!rds_iwdev) { - if (printk_ratelimit()) - printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", + printk_ratelimited(KERN_NOTICE "RDS/IW: No client_data for device %s\n", dev->name); return -EOPNOTSUPP; } @@ -365,13 +365,12 @@ static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; - } else if (printk_ratelimit()) { - printk(KERN_NOTICE "RDS: Connection from %pI4 using " + } + printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using " "incompatible protocol version %u.%u\n", &dp->dp_saddr, dp->dp_protocol_major, dp->dp_protocol_minor); - } return version; } @@ -522,7 +521,7 @@ int rds_iw_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; @@ -695,7 +694,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) unsigned long flags; /* XXX too lazy? */ - ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); + ic = kzalloc(sizeof(struct rds_iw_connection), gfp); if (!ic) return -ENOMEM; diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 59509e9a9e7..a817705ce2d 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -32,6 +32,7 @@ */ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -83,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list); + struct list_head *kill_list, + int *unpinned); static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) @@ -122,7 +124,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd #else /* FIXME - needs to compare the local and remote * ipaddr/port tuple, but the ipaddr is the only - * available infomation in the rds_sock (as the rest are + * available information in the rds_sock (as the rest are * zero'ed. It doesn't appear to be properly populated * during connection setup... */ @@ -475,17 +477,6 @@ void rds_iw_sync_mr(void *trans_private, int direction) } } -static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all) -{ - unsigned int item_count; - - item_count = atomic_read(&pool->item_count); - if (free_all) - return item_count; - - return 0; -} - /* * Flush our pool of MRs. * At a minimum, all currently unused MRs are unmapped. @@ -498,7 +489,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) LIST_HEAD(unmap_list); LIST_HEAD(kill_list); unsigned long flags; - unsigned int nfreed = 0, ncleaned = 0, free_goal; + unsigned int nfreed = 0, ncleaned = 0, unpinned = 0; int ret = 0; rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); @@ -512,8 +503,6 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) list_splice_init(&pool->clean_list, &kill_list); spin_unlock_irqrestore(&pool->list_lock, flags); - free_goal = rds_iw_flush_goal(pool, free_all); - /* Batched invalidate of dirty MRs. * For FMR based MRs, the mappings on the unmap list are * actually members of an ibmr (ibmr->mapping). They either @@ -523,7 +512,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) * will be destroyed by the unmap function. */ if (!list_empty(&unmap_list)) { - ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list); + ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, + &kill_list, &unpinned); /* If we've been asked to destroy all MRs, move those * that were simply cleaned to the kill list */ if (free_all) @@ -547,6 +537,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) spin_unlock_irqrestore(&pool->list_lock, flags); } + atomic_sub(unpinned, &pool->free_pinned); atomic_sub(ncleaned, &pool->dirty_count); atomic_sub(nfreed, &pool->item_count); @@ -729,8 +720,8 @@ static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) failed_wr = &f_wr; ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr); BUG_ON(failed_wr != &f_wr); - if (ret && printk_ratelimit()) - printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", + if (ret) + printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); return ret; } @@ -751,8 +742,8 @@ static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr) failed_wr = &s_wr; ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr); - if (ret && printk_ratelimit()) { - printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", + if (ret) { + printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); goto out; } @@ -827,7 +818,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list) + struct list_head *kill_list, + int *unpinned) { struct rds_iw_mapping *mapping, *next; unsigned int ncleaned = 0; @@ -854,6 +846,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, spin_lock_irqsave(&pool->list_lock, flags); list_for_each_entry_safe(mapping, next, unmap_list, m_list) { + *unpinned += mapping->m_sg.len; list_move(&mapping->m_list, &laundered); ncleaned++; } diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index 5e57347f49f..aa8bf678600 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -429,7 +429,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, { atomic64_set(&ic->i_ack_next, seq); if (ack_required) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); } } @@ -437,7 +437,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, static u64 rds_iw_get_ack(struct rds_iw_connection *ic) { clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return atomic64_read(&ic->i_ack_next); } @@ -598,7 +598,7 @@ static void rds_iw_cong_recv(struct rds_connection *conn, to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ - addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); + addr = kmap_atomic(frag->f_page); src = addr + frag_off; dst = (void *)map->m_page_addrs[map_page] + map_off; @@ -608,7 +608,7 @@ static void rds_iw_cong_recv(struct rds_connection *conn, uncongested |= ~(*src) & *dst; *dst++ = *src++; } - kunmap_atomic(addr, KM_SOFTIRQ0); + kunmap_atomic(addr); copied += to_copy; @@ -661,7 +661,7 @@ static void rds_iw_process_recv(struct rds_connection *conn, if (byte_len < sizeof(struct rds_header)) { rds_iw_conn_error(conn, "incoming message " - "from %pI4 didn't inclue a " + "from %pI4 didn't include a " "header, disconnecting and " "reconnecting\n", &conn->c_faddr); @@ -754,8 +754,7 @@ static void rds_iw_process_recv(struct rds_connection *conn, rds_iw_cong_recv(conn, iwinc); else { rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, - &iwinc->ii_inc, GFP_ATOMIC, - KM_SOFTIRQ0); + &iwinc->ii_inc, GFP_ATOMIC); state->ack_next = be64_to_cpu(hdr->h_sequence); state->ack_next_valid = 1; } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 6280ea020d4..9105ea03aec 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/device.h> #include <linux/dmapool.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -231,7 +232,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) } if (wc.wr_id == RDS_IW_ACK_WR_ID) { - if (ic->i_ack_queued + HZ/2 < jiffies) + if (time_after(jiffies, ic->i_ack_queued + HZ/2)) rds_iw_stats_inc(s_iw_tx_stalled); rds_iw_ack_send_complete(ic); continue; @@ -258,8 +259,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) * when the SEND completes. */ break; default: - if (printk_ratelimit()) - printk(KERN_NOTICE + printk_ratelimited(KERN_NOTICE "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; @@ -267,7 +267,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) send->s_wr.opcode = 0xdead; send->s_wr.num_sge = 1; - if (send->s_queued + HZ/2 < jiffies) + if (time_after(jiffies, send->s_queued + HZ/2)) rds_iw_stats_inc(s_iw_tx_stalled); /* If a RDMA operation produced an error, signal this right @@ -307,7 +307,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) * * Conceptually, we have two counters: * - send credits: this tells us how many WRs we're allowed - * to submit without overruning the reciever's queue. For + * to submit without overruning the receiver's queue. For * each SEND WR we post, we decrement this by one. * * - posted credits: this tells us how many WRs we recently diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index e2e47176e72..139239d2cb2 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -55,7 +55,7 @@ static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL; unsigned int rds_iw_sysctl_flow_control = 1; -static ctl_table rds_iw_sysctl_table[] = { +static struct ctl_table rds_iw_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_iw_sysctl_max_send_wr, @@ -109,22 +109,14 @@ static ctl_table rds_iw_sysctl_table[] = { { } }; -static struct ctl_path rds_iw_sysctl_path[] = { - { .procname = "net", }, - { .procname = "rds", }, - { .procname = "iw", }, - { } -}; - void rds_iw_sysctl_exit(void) { - if (rds_iw_sysctl_hdr) - unregister_sysctl_table(rds_iw_sysctl_hdr); + unregister_net_sysctl_table(rds_iw_sysctl_hdr); } int rds_iw_sysctl_init(void) { - rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table); + rds_iw_sysctl_hdr = register_net_sysctl(&init_net, "net/rds/iw", rds_iw_sysctl_table); if (!rds_iw_sysctl_hdr) return -ENOMEM; return 0; diff --git a/net/rds/loop.c b/net/rds/loop.c index aeec1d483b1..6b12b68541a 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off) { + struct scatterlist *sgp = &rm->data.op_sg[sg]; + int ret = sizeof(struct rds_header) + + be32_to_cpu(rm->m_inc.i_hdr.h_len); + /* Do not send cong updates to loopback */ if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off); + goto out; } BUG_ON(hdr_off || sg || off); @@ -74,14 +79,14 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, rds_message_addref(rm); rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, - GFP_KERNEL, KM_USER0); + GFP_KERNEL); rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence), NULL); rds_inc_put(&rm->m_inc); - - return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); +out: + return ret; } /* @@ -116,7 +121,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp) struct rds_loop_connection *lc; unsigned long flags; - lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); + lc = kzalloc(sizeof(struct rds_loop_connection), gfp); if (!lc) return -ENOMEM; diff --git a/net/rds/message.c b/net/rds/message.c index 1fd3d29023d..aba232f9f30 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -32,6 +32,7 @@ */ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/export.h> #include "rds.h" @@ -81,10 +82,7 @@ static void rds_message_purge(struct rds_message *rm) void rds_message_put(struct rds_message *rm) { rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); - if (atomic_read(&rm->m_refcount) == 0) { -printk(KERN_CRIT "danger refcount zero on %p\n", rm); -WARN_ON(1); - } + WARN(!atomic_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); if (atomic_dec_and_test(&rm->m_refcount)) { BUG_ON(!list_empty(&rm->m_sock_item)); BUG_ON(!list_empty(&rm->m_conn_item)); @@ -196,6 +194,9 @@ struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp) { struct rds_message *rm; + if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message)) + return NULL; + rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp); if (!rm) goto out; diff --git a/net/rds/page.c b/net/rds/page.c index d8acdebe3c7..9005a2c920e 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -32,6 +32,8 @@ */ #include <linux/highmem.h> #include <linux/gfp.h> +#include <linux/cpu.h> +#include <linux/export.h> #include "rds.h" @@ -72,11 +74,12 @@ int rds_page_copy_user(struct page *page, unsigned long offset, } EXPORT_SYMBOL_GPL(rds_page_copy_user); -/* - * Message allocation uses this to build up regions of a message. +/** + * rds_page_remainder_alloc - build up regions of a message. * - * @bytes - the number of bytes needed. - * @gfp - the waiting behaviour of the allocation + * @scat: Scatter list for message + * @bytes: the number of bytes needed. + * @gfp: the waiting behaviour of the allocation * * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to * kmap the pages, etc. diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 4195a053982..6cd9d1deafc 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -30,6 +30,7 @@ * SOFTWARE. * */ +#include <linux/module.h> #include <rdma/rdma_cm.h> #include "rdma_transport.h" @@ -158,7 +159,8 @@ static int rds_rdma_listen_init(void) struct rdma_cm_id *cm_id; int ret; - cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " @@ -166,7 +168,7 @@ static int rds_rdma_listen_init(void) return ret; } - sin.sin_family = AF_INET, + sin.sin_family = AF_INET; sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); sin.sin_port = (__force u16)htons(RDS_PORT); diff --git a/net/rds/rds.h b/net/rds/rds.h index 9542449c072..48f8ffc60f8 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -36,8 +36,8 @@ #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else /* sigh, pr_debug() causes unused variable warnings */ -static inline void __attribute__ ((format (printf, 1, 2))) -rdsdebug(char *fmt, ...) +static inline __printf(1, 2) +void rdsdebug(char *fmt, ...) { } #endif @@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...) #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) #define RDS_CONG_MAP_BYTES (65536 / 8) -#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long)) #define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) @@ -626,8 +625,8 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), size_t item_len); -void __rds_conn_error(struct rds_connection *conn, const char *, ...) - __attribute__ ((format (printf, 2, 3))); +__printf(2, 3) +void __rds_conn_error(struct rds_connection *conn, const char *, ...); #define rds_conn_error(conn, fmt...) \ __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) @@ -705,7 +704,7 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, __be32 saddr); void rds_inc_put(struct rds_incoming *inc); void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, - struct rds_incoming *inc, gfp_t gfp, enum km_type km); + struct rds_incoming *inc, gfp_t gfp); int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int msg_flags); void rds_clear_recv_queue(struct rds_sock *rs); @@ -750,7 +749,7 @@ void rds_atomic_send_complete(struct rds_message *rm, int wc_status); int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); -extern void __rds_put_mr_final(struct rds_mr *mr); +void __rds_put_mr_final(struct rds_mr *mr); static inline void rds_mr_put(struct rds_mr *mr) { if (atomic_dec_and_test(&mr->r_refcount)) diff --git a/net/rds/recv.c b/net/rds/recv.c index 596689e5927..bd82522534f 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -34,6 +34,7 @@ #include <linux/slab.h> #include <net/sock.h> #include <linux/in.h> +#include <linux/export.h> #include "rds.h" @@ -154,7 +155,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock * tell us which roles the addrs in the conn are playing for this message. */ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, - struct rds_incoming *inc, gfp_t gfp, enum km_type km) + struct rds_incoming *inc, gfp_t gfp) { struct rds_sock *rs = NULL; struct sock *sk; @@ -401,7 +402,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct rds_sock *rs = rds_sk_to_rs(sk); long timeo; int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; - struct sockaddr_in *sin; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct rds_incoming *inc = NULL; /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */ @@ -478,12 +479,12 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rds_stats_inc(s_recv_delivered); - sin = (struct sockaddr_in *)msg->msg_name; if (sin) { sin->sin_family = AF_INET; sin->sin_port = inc->i_hdr.h_sport; sin->sin_addr.s_addr = inc->i_saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + msg->msg_namelen = sizeof(*sin); } break; } diff --git a/net/rds/send.c b/net/rds/send.c index 35b9c2e9caf..23718160d71 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -31,10 +31,13 @@ * */ #include <linux/kernel.h> +#include <linux/moduleparam.h> #include <linux/gfp.h> #include <net/sock.h> #include <linux/in.h> #include <linux/list.h> +#include <linux/ratelimit.h> +#include <linux/export.h> #include "rds.h" @@ -104,7 +107,7 @@ static int acquire_in_xmit(struct rds_connection *conn) static void release_in_xmit(struct rds_connection *conn) { clear_bit(RDS_IN_XMIT, &conn->c_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); /* * We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk @@ -116,7 +119,7 @@ static void release_in_xmit(struct rds_connection *conn) } /* - * We're making the concious trade-off here to only send one message + * We're making the conscious trade-off here to only send one message * down the connection at a time. * Pro: * - tx queueing is a simple fifo list @@ -658,7 +661,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack, /* order flag updates with spin locks */ if (!list_empty(&list)) - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); spin_unlock_irqrestore(&conn->c_lock, flags); @@ -688,7 +691,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) } /* order flag updates with the rs lock */ - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); spin_unlock_irqrestore(&rs->rs_lock, flags); @@ -919,7 +922,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); - struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); __be32 daddr; __be16 dport; struct rds_message *rm = NULL; @@ -932,7 +935,6 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { - printk(KERN_INFO "msg_flags 0x%08X\n", msg->msg_flags); ret = -EOPNOTSUPP; goto out; } @@ -1006,16 +1008,14 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { - if (printk_ratelimit()) - printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", + printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", &rm->rdma, conn->c_trans->xmit_rdma); ret = -EOPNOTSUPP; goto out; } if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) { - if (printk_ratelimit()) - printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", + printk_ratelimited(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", &rm->atomic, conn->c_trans->xmit_atomic); ret = -EOPNOTSUPP; goto out; @@ -1122,7 +1122,7 @@ rds_send_pong(struct rds_connection *conn, __be16 dport) rds_stats_inc(s_send_pong); if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) - rds_send_xmit(conn); + queue_delayed_work(rds_wq, &conn->c_send_w, 0); rds_message_put(rm); return 0; diff --git a/net/rds/stats.c b/net/rds/stats.c index 10c759ccac0..73be187d389 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -33,6 +33,7 @@ #include <linux/percpu.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> +#include <linux/export.h> #include "rds.h" @@ -86,6 +87,7 @@ void rds_stats_info_copy(struct rds_info_iterator *iter, for (i = 0; i < nr; i++) { BUG_ON(strlen(names[i]) >= sizeof(ctr.name)); strncpy(ctr.name, names[i], sizeof(ctr.name) - 1); + ctr.name[sizeof(ctr.name) - 1] = '\0'; ctr.value = values[i]; rds_info_copy(iter, &ctr, sizeof(ctr)); diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index 25ad0c77a26..c3b0cd43eb5 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -49,7 +49,7 @@ unsigned int rds_sysctl_max_unacked_bytes = (16 << 20); unsigned int rds_sysctl_ping_enable = 1; -static ctl_table rds_sysctl_rds_table[] = { +static struct ctl_table rds_sysctl_rds_table[] = { { .procname = "reconnect_min_delay_ms", .data = &rds_sysctl_reconnect_min_jiffies, @@ -92,17 +92,9 @@ static ctl_table rds_sysctl_rds_table[] = { { } }; -static struct ctl_path rds_sysctl_path[] = { - { .procname = "net", }, - { .procname = "rds", }, - { } -}; - - void rds_sysctl_exit(void) { - if (rds_sysctl_reg_table) - unregister_sysctl_table(rds_sysctl_reg_table); + unregister_net_sysctl_table(rds_sysctl_reg_table); } int rds_sysctl_init(void) @@ -110,7 +102,7 @@ int rds_sysctl_init(void) rds_sysctl_reconnect_min = msecs_to_jiffies(1); rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; - rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table); + rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table); if (!rds_sysctl_reg_table) return -ENOMEM; return 0; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 8e0a32001c9..edac9ef2bc8 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/in.h> +#include <linux/module.h> #include <net/tcp.h> #include "rds.h" diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 9cf2927d002..65637491f72 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -61,12 +61,12 @@ void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ int rds_tcp_listen_init(void); void rds_tcp_listen_stop(void); -void rds_tcp_listen_data_ready(struct sock *sk, int bytes); +void rds_tcp_listen_data_ready(struct sock *sk); /* tcp_recv.c */ int rds_tcp_recv_init(void); void rds_tcp_recv_exit(void); -void rds_tcp_data_ready(struct sock *sk, int bytes); +void rds_tcp_data_ready(struct sock *sk); int rds_tcp_recv(struct rds_connection *conn); void rds_tcp_inc_free(struct rds_incoming *inc); int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index af95c8e058f..a65ee78db0c 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { state_change = sk->sk_state_change; @@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk) break; } out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 8b5cc4aa886..23ab4dcd1d9 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -108,13 +108,13 @@ static void rds_tcp_accept_worker(struct work_struct *work) cond_resched(); } -void rds_tcp_listen_data_ready(struct sock *sk, int bytes) +void rds_tcp_listen_data_ready(struct sock *sk) { - void (*ready)(struct sock *sk, int bytes); + void (*ready)(struct sock *sk); rdsdebug("listen data ready sk %p\n", sk); - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); ready = sk->sk_user_data; if (!ready) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -131,8 +131,8 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) queue_work(rds_wq, &rds_tcp_listen_work); out: - read_unlock_bh(&sk->sk_callback_lock); - ready(sk, bytes); + read_unlock(&sk->sk_callback_lock); + ready(sk); } int rds_tcp_listen_init(void) @@ -145,7 +145,7 @@ int rds_tcp_listen_init(void) if (ret < 0) goto out; - sock->sk->sk_reuse = 1; + sock->sk->sk_reuse = SK_CAN_REUSE; rds_tcp_nonagle(sock); write_lock_bh(&sock->sk->sk_callback_lock); @@ -153,7 +153,7 @@ int rds_tcp_listen_init(void) sock->sk->sk_data_ready = rds_tcp_listen_data_ready; write_unlock_bh(&sock->sk->sk_callback_lock); - sin.sin_family = PF_INET, + sin.sin_family = PF_INET; sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); sin.sin_port = (__force u16)htons(RDS_TCP_PORT); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 78205e25500..9ae6e0a264e 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -169,7 +169,6 @@ static void rds_tcp_cong_recv(struct rds_connection *conn, struct rds_tcp_desc_arg { struct rds_connection *conn; gfp_t gfp; - enum km_type km; }; static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, @@ -255,7 +254,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, else rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, &tinc->ti_inc, - arg->gfp, arg->km); + arg->gfp); tc->t_tinc_hdr_rem = sizeof(struct rds_header); tc->t_tinc_data_rem = 0; @@ -272,8 +271,7 @@ out: } /* the caller has to hold the sock lock */ -static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp, - enum km_type km) +static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) { struct rds_tcp_connection *tc = conn->c_transport_data; struct socket *sock = tc->t_sock; @@ -283,7 +281,6 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp, /* It's like glib in the kernel! */ arg.conn = conn; arg.gfp = gfp; - arg.km = km; desc.arg.data = &arg; desc.error = 0; desc.count = 1; /* give more than one skb per call */ @@ -311,21 +308,21 @@ int rds_tcp_recv(struct rds_connection *conn) rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock); lock_sock(sock->sk); - ret = rds_tcp_read_sock(conn, GFP_KERNEL, KM_USER0); + ret = rds_tcp_read_sock(conn, GFP_KERNEL); release_sock(sock->sk); return ret; } -void rds_tcp_data_ready(struct sock *sk, int bytes) +void rds_tcp_data_ready(struct sock *sk) { - void (*ready)(struct sock *sk, int bytes); + void (*ready)(struct sock *sk); struct rds_connection *conn; struct rds_tcp_connection *tc; - rdsdebug("data ready sk %p bytes %d\n", sk, bytes); + rdsdebug("data ready sk %p\n", sk); - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -336,11 +333,11 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) ready = tc->t_orig_data_ready; rds_tcp_stats_inc(s_tcp_data_ready_calls); - if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) + if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock_bh(&sk->sk_callback_lock); - ready(sk, bytes); + read_unlock(&sk->sk_callback_lock); + ready(sk); } int rds_tcp_recv_init(void) diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 1b4fd68f0c7..53b17ca0dff 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -93,7 +93,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, rm->m_ack_seq = tc->t_last_sent_nxt + sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags); tc->t_last_expected_una = rm->m_ack_seq + 1; @@ -174,7 +174,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { write_space = sk->sk_write_space; @@ -194,7 +194,7 @@ void rds_tcp_write_space(struct sock *sk) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if diff --git a/net/rds/tcp_stats.c b/net/rds/tcp_stats.c index d5898d03cd6..f8a7954f1f5 100644 --- a/net/rds/tcp_stats.c +++ b/net/rds/tcp_stats.c @@ -40,7 +40,7 @@ DEFINE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats) ____cacheline_aligned; -static const char const *rds_tcp_stat_names[] = { +static const char * const rds_tcp_stat_names[] = { "tcp_data_ready_calls", "tcp_write_space_calls", "tcp_sndbuf_full", diff --git a/net/rds/threads.c b/net/rds/threads.c index 0fd90f8c5f5..65eaefcab24 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -32,6 +32,7 @@ */ #include <linux/kernel.h> #include <linux/random.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/xlist.h b/net/rds/xlist.h deleted file mode 100644 index e6b5190dadd..00000000000 --- a/net/rds/xlist.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _LINUX_XLIST_H -#define _LINUX_XLIST_H - -#include <linux/stddef.h> -#include <linux/poison.h> -#include <linux/prefetch.h> -#include <asm/system.h> - -struct xlist_head { - struct xlist_head *next; -}; - -static inline void INIT_XLIST_HEAD(struct xlist_head *list) -{ - list->next = NULL; -} - -static inline int xlist_empty(struct xlist_head *head) -{ - return head->next == NULL; -} - -static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail, - struct xlist_head *head) -{ - struct xlist_head *cur; - struct xlist_head *check; - - while (1) { - cur = head->next; - tail->next = cur; - check = cmpxchg(&head->next, cur, new); - if (check == cur) - break; - } -} - -static inline struct xlist_head *xlist_del_head(struct xlist_head *head) -{ - struct xlist_head *cur; - struct xlist_head *check; - struct xlist_head *next; - - while (1) { - cur = head->next; - if (!cur) - goto out; - - next = cur->next; - check = cmpxchg(&head->next, cur, next); - if (check == cur) - goto out; - } -out: - return cur; -} - -static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head) -{ - struct xlist_head *cur; - - cur = head->next; - if (!cur) - return NULL; - - head->next = cur->next; - return cur; -} - -static inline void xlist_splice(struct xlist_head *list, - struct xlist_head *head) -{ - struct xlist_head *cur; - - WARN_ON(head->next); - cur = xchg(&list->next, NULL); - head->next = cur; -} - -#endif |
