diff options
Diffstat (limited to 'drivers/infiniband/hw/mthca/mthca_srq.c')
| -rw-r--r-- | drivers/infiniband/hw/mthca/mthca_srq.c | 267 |
1 files changed, 166 insertions, 101 deletions
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 26d5161fde0..d22f970480c 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -28,12 +28,13 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ */ #include <linux/slab.h> #include <linux/string.h> +#include <linux/sched.h> + +#include <asm/io.h> #include "mthca_dev.h" #include "mthca_cmd.h" @@ -49,7 +50,8 @@ struct mthca_tavor_srq_context { __be32 state_pd; __be32 lkey; __be32 uar; - __be32 wqe_cnt; + __be16 limit_watermark; + __be16 wqe_cnt; u32 reserved[2]; }; @@ -113,11 +115,16 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, struct mthca_srq *srq, struct mthca_arbel_srq_context *context) { - int logsize; + int logsize, max; memset(context, 0, sizeof *context); - logsize = long_log2(srq->max) + srq->wqe_shift; + /* + * Put max in a temporary variable to work around gcc bug + * triggered by ilog2() on sparc64. + */ + max = srq->max; + logsize = ilog2(max); context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); context->db_index = cpu_to_be32(srq->db_index); @@ -166,9 +173,17 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, * scatter list L_Keys to the sentry value of 0x100. */ for (i = 0; i < srq->max; ++i) { - wqe = get_wqe(srq, i); + struct mthca_next_seg *next; - *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; + next = wqe = get_wqe(srq, i); + + if (i < srq->max - 1) { + *wqe_to_link(wqe) = i + 1; + next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1); + } else { + *wqe_to_link(wqe) = -1; + next->nda_op = 0; + } for (scatter = wqe + sizeof (struct mthca_next_seg); (void *) scatter < wqe + (1 << srq->wqe_shift); @@ -185,13 +200,12 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq) { struct mthca_mailbox *mailbox; - u8 status; int ds; int err; /* Sanity check SRQ size before proceeding */ if (attr->max_wr > dev->limits.max_srq_wqes || - attr->max_sge > dev->limits.max_sg) + attr->max_sge > dev->limits.max_srq_sge) return -EINVAL; srq->max = attr->max_wr; @@ -200,11 +214,17 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (mthca_is_memfree(dev)) srq->max = roundup_pow_of_two(srq->max + 1); + else + srq->max = srq->max + 1; - ds = min(64UL, + ds = max(64UL, roundup_pow_of_two(sizeof (struct mthca_next_seg) + srq->max_gs * sizeof (struct mthca_data_seg))); - srq->wqe_shift = long_log2(ds); + + if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz)) + return -EINVAL; + + srq->wqe_shift = ilog2(ds); srq->srqn = mthca_alloc(&dev->srq_table.alloc); if (srq->srqn == -1) @@ -236,26 +256,21 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, goto err_out_mailbox; spin_lock_init(&srq->lock); - atomic_set(&srq->refcount, 1); + srq->refcount = 1; init_waitqueue_head(&srq->wait); + mutex_init(&srq->mutex); if (mthca_is_memfree(dev)) mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); else mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); - err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status); + err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn); if (err) { mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err); goto err_out_free_buf; } - if (status) { - mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_free_buf; - } spin_lock_irq(&dev->srq_table.lock); if (mthca_array_set(&dev->srq_table.srq, @@ -271,14 +286,15 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, srq->first_free = 0; srq->last_free = srq->max - 1; + attr->max_wr = srq->max - 1; + attr->max_sge = srq->max_gs; + return 0; err_out_free_srq: - err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn); if (err) mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); err_out_free_buf: if (!pd->ibpd.uobject) @@ -300,11 +316,21 @@ err_out: return err; } +static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq) +{ + int c; + + spin_lock_irq(&dev->srq_table.lock); + c = srq->refcount; + spin_unlock_irq(&dev->srq_table.lock); + + return c; +} + void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) { struct mthca_mailbox *mailbox; int err; - u8 status; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) { @@ -312,19 +338,17 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) return; } - err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn); if (err) mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); spin_lock_irq(&dev->srq_table.lock); mthca_array_clear(&dev->srq_table.srq, srq->srqn & (dev->limits.num_srqs - 1)); + --srq->refcount; spin_unlock_irq(&dev->srq_table.lock); - atomic_dec(&srq->refcount); - wait_event(srq->wait, !atomic_read(&srq->refcount)); + wait_event(srq->wait, !get_srq_refcount(dev, srq)); if (!srq->ibsrq.uobject) { mthca_free_srq_buf(dev, srq); @@ -338,26 +362,61 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) } int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask) -{ + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); - int ret; - u8 status; + int ret = 0; /* We don't support resizing SRQs (yet?) */ if (attr_mask & IB_SRQ_MAX_WR) return -EINVAL; if (attr_mask & IB_SRQ_LIMIT) { - ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); - if (ret) - return ret; - if (status) + u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max; + if (attr->srq_limit > max_wr) return -EINVAL; + + mutex_lock(&srq->mutex); + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit); + mutex_unlock(&srq->mutex); } - return 0; + return ret; +} + +int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + struct mthca_mailbox *mailbox; + struct mthca_arbel_srq_context *arbel_ctx; + struct mthca_tavor_srq_context *tavor_ctx; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox); + if (err) + goto out; + + if (mthca_is_memfree(dev)) { + arbel_ctx = mailbox->buf; + srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark); + } else { + tavor_ctx = mailbox->buf; + srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark); + } + + srq_attr->max_wr = srq->max - 1; + srq_attr->max_sge = srq->max_gs; + +out: + mthca_free_mailbox(dev, mailbox); + + return err; } void mthca_srq_event(struct mthca_dev *dev, u32 srqn, @@ -369,7 +428,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, spin_lock(&dev->srq_table.lock); srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); if (srq) - atomic_inc(&srq->refcount); + ++srq->refcount; spin_unlock(&dev->srq_table.lock); if (!srq) { @@ -386,8 +445,10 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); out: - if (atomic_dec_and_test(&srq->refcount)) + spin_lock(&dev->srq_table.lock); + if (!--srq->refcount) wake_up(&srq->wait); + spin_unlock(&dev->srq_table.lock); } /* @@ -396,16 +457,15 @@ out: void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) { int ind; + struct mthca_next_seg *last_free; ind = wqe_addr >> srq->wqe_shift; spin_lock(&srq->lock); - if (likely(srq->first_free >= 0)) - *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; - else - srq->first_free = ind; - + last_free = get_wqe(srq, srq->last_free); + *wqe_to_link(last_free) = ind; + last_free->nda_op = htonl((ind << srq->wqe_shift) | 1); *wqe_to_link(get_wqe(srq, ind)) = -1; srq->last_free = ind; @@ -431,20 +491,12 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, first_ind = srq->first_free; - for (nreq = 0; wr; ++nreq, wr = wr->next) { - ind = srq->first_free; - - if (ind < 0) { - mthca_err(dev, "SRQ %06x full\n", srq->srqn); - err = -ENOMEM; - *bad_wr = wr; - break; - } - + for (nreq = 0; wr; wr = wr->next) { + ind = srq->first_free; wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); - if (next_ind < 0) { + if (unlikely(next_ind < 0)) { mthca_err(dev, "SRQ %06x full\n", srq->srqn); err = -ENOMEM; *bad_wr = wr; @@ -454,7 +506,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, prev_wqe = srq->last; srq->last = wqe; - ((struct mthca_next_seg *) wqe)->nda_op = 0; ((struct mthca_next_seg *) wqe)->ee_nds = 0; /* flags field will always remain 0 */ @@ -468,48 +519,55 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < srq->max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); - ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32((ind << srq->wqe_shift) | 1); - wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = cpu_to_be32(MTHCA_NEXT_DBD); srq->wrid[ind] = wr->wr_id; srq->first_free = next_ind; - } - if (likely(nreq)) { - __be32 doorbell[2]; + ++nreq; + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + /* + * Make sure that descriptors are written + * before doorbell is rung. + */ + wmb(); + + mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); - doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); - doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); + first_ind = srq->first_free; + } + } + if (likely(nreq)) { /* * Make sure that descriptors are written before * doorbell is rung. */ wmb(); - mthca_write64(doorbell, + mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } + /* + * Make sure doorbells don't leak out of SRQ spinlock and + * reach the HCA out of order: + */ + mmiowb(); + spin_unlock_irqrestore(&srq->lock, flags); return err; } @@ -530,27 +588,17 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, spin_lock_irqsave(&srq->lock, flags); for (nreq = 0; wr; ++nreq, wr = wr->next) { - ind = srq->first_free; - - if (ind < 0) { - mthca_err(dev, "SRQ %06x full\n", srq->srqn); - err = -ENOMEM; - *bad_wr = wr; - break; - } - + ind = srq->first_free; wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); - if (next_ind < 0) { + if (unlikely(next_ind < 0)) { mthca_err(dev, "SRQ %06x full\n", srq->srqn); err = -ENOMEM; *bad_wr = wr; break; } - ((struct mthca_next_seg *) wqe)->nda_op = - cpu_to_be32((next_ind << srq->wqe_shift) | 1); ((struct mthca_next_seg *) wqe)->ee_nds = 0; /* flags field will always remain 0 */ @@ -563,20 +611,12 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < srq->max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); srq->wrid[ind] = wr->wr_id; srq->first_free = next_ind; @@ -597,7 +637,32 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, return err; } -int __devinit mthca_init_srq_table(struct mthca_dev *dev) +int mthca_max_srq_sge(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) + return dev->limits.max_sg; + + /* + * SRQ allocations are based on powers of 2 for Tavor, + * (although they only need to be multiples of 16 bytes). + * + * Therefore, we need to base the max number of sg entries on + * the largest power of 2 descriptor size that is <= to the + * actual max WQE descriptor size, rather than return the + * max_sg value given by the firmware (which is based on WQE + * sizes as multiples of 16, not powers of 2). + * + * If SRQ implementation is changed for Tavor to be based on + * multiples of 16, the calculation below can be deleted and + * the FW max_sg value returned. + */ + return min_t(int, dev->limits.max_sg, + ((1 << (fls(dev->limits.max_desc_sz) - 1)) - + sizeof (struct mthca_next_seg)) / + sizeof (struct mthca_data_seg)); +} + +int mthca_init_srq_table(struct mthca_dev *dev) { int err; @@ -621,7 +686,7 @@ int __devinit mthca_init_srq_table(struct mthca_dev *dev) return err; } -void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev) +void mthca_cleanup_srq_table(struct mthca_dev *dev) { if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) return; |
