diff options
Diffstat (limited to 'drivers/infiniband/hw/qib/qib_file_ops.c')
| -rw-r--r-- | drivers/infiniband/hw/qib/qib_file_ops.c | 251 |
1 files changed, 169 insertions, 82 deletions
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 75bfad16c11..b15e34eeef6 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -39,15 +39,19 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/io.h> -#include <linux/uio.h> +#include <linux/aio.h> #include <linux/jiffies.h> #include <asm/pgtable.h> #include <linux/delay.h> +#include <linux/export.h> #include "qib.h" #include "qib_common.h" #include "qib_user_sdma.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + static int qib_open(struct inode *, struct file *); static int qib_close(struct inode *, struct file *); static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *); @@ -314,8 +318,9 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, } if (cnt > tidcnt) { /* make sure it all fits in tid_pg_list */ - qib_devinfo(dd->pcidev, "Process tried to allocate %u " - "TIDs, only trying max (%u)\n", cnt, tidcnt); + qib_devinfo(dd->pcidev, + "Process tried to allocate %u TIDs, only trying max (%u)\n", + cnt, tidcnt); cnt = tidcnt; } pagep = (struct page **) rcd->tid_pg_list; @@ -749,9 +754,9 @@ static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd, ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); if (ret) - qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x " - "bytes failed: %d\n", what, rcd->ctxt, - pfn, len, ret); + qib_devinfo(dd->pcidev, + "%s ctxt%u mmap of %lx, %x bytes failed: %d\n", + what, rcd->ctxt, pfn, len, ret); bail: return ret; } @@ -770,8 +775,9 @@ static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd, */ sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE; if ((vma->vm_end - vma->vm_start) > sz) { - qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen " - "%lx > PAGE\n", vma->vm_end - vma->vm_start); + qib_devinfo(dd->pcidev, + "FAIL mmap userreg: reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); ret = -EFAULT; } else { phys = dd->physaddr + ureg; @@ -801,8 +807,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, * for it. */ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) { - qib_devinfo(dd->pcidev, "FAIL mmap piobufs: " - "reqlen %lx > PAGE\n", + qib_devinfo(dd->pcidev, + "FAIL mmap piobufs: reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); ret = -EINVAL; goto bail; @@ -846,8 +852,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, size = rcd->rcvegrbuf_size; total_size = rcd->rcvegrbuf_chunks * size; if ((vma->vm_end - vma->vm_start) > total_size) { - qib_devinfo(dd->pcidev, "FAIL on egr bufs: " - "reqlen %lx > actual %lx\n", + qib_devinfo(dd->pcidev, + "FAIL on egr bufs: reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); ret = -EINVAL; @@ -855,8 +861,9 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, } if (vma->vm_flags & VM_WRITE) { - qib_devinfo(dd->pcidev, "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); + qib_devinfo(dd->pcidev, + "Can't map eager buffers as writable (flags=%lx)\n", + vma->vm_flags); ret = -EPERM; goto bail; } @@ -964,7 +971,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &qib_file_vm_ops; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ret = 1; bail: @@ -1148,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt) return pollflag; } +static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) +{ + struct qib_filedata *fd = fp->private_data; + const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); + int local_cpu; + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it on the local NUMA node. + */ + if ((weight >= qib_cpulist_count) && + (cpumask_weight(local_mask) <= qib_cpulist_count)) { + for_each_cpu(local_cpu, local_mask) + if (!test_and_set_bit(local_cpu, qib_cpulist)) { + fd->rec_cpu_num = local_cpu; + return; + } + } + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it, as a rendevous for all + * users of the driver. If they don't actually later + * set affinity to this cpu, or set it to some other cpu, + * it just means that sooner or later we don't recommend + * a cpu, and let the scheduler do it's best. + */ + if (weight >= qib_cpulist_count) { + int cpu; + cpu = find_first_zero_bit(qib_cpulist, + qib_cpulist_count); + if (cpu == qib_cpulist_count) + qib_dev_err(dd, + "no cpus avail for affinity PID %u\n", + current->pid); + else { + __set_bit(cpu, qib_cpulist); + fd->rec_cpu_num = cpu; + } + } +} + /* * Check that userland and driver are compatible for subcontexts. */ @@ -1170,7 +1220,7 @@ static int qib_compatible_subctxts(int user_swmajor, int user_swminor) return user_swminor == 3; default: /* >= 4 are compatible (or are expected to be) */ - return user_swminor >= 4; + return user_swminor <= QIB_USER_SWMINOR; } } /* make no promises yet for future major versions */ @@ -1252,12 +1302,20 @@ bail: static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, struct file *fp, const struct qib_user_info *uinfo) { + struct qib_filedata *fd = fp->private_data; struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; void *ptmp = NULL; int ret; + int numa_id; + + assign_ctxt_affinity(fp, dd); - rcd = qib_create_ctxtdata(ppd, ctxt); + numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ? + cpu_to_node(fd->rec_cpu_num) : + numa_node_id()) : dd->assigned_node_id; + + rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); /* * Allocate memory for use in qib_tid_update() at open to @@ -1269,8 +1327,8 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, GFP_KERNEL); if (!rcd || !ptmp) { - qib_dev_err(dd, "Unable to allocate ctxtdata " - "memory, failing open\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata memory, failing open\n"); ret = -ENOMEM; goto bailerr; } @@ -1284,10 +1342,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); ctxt_fp(fp) = rcd; qib_stats.sps_ctxts++; + dd->freectxts--; ret = 0; goto bail; bailerr: + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + dd->rcd[ctxt] = NULL; kfree(rcd); kfree(ptmp); @@ -1397,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, cused++; else cfree++; - if (pusable && cfree && cused < inuse) { + if (cfree && cused < inuse) { udd = dd; inuse = cused; } @@ -1477,6 +1539,58 @@ static int qib_open(struct inode *in, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } +static int find_hca(unsigned int cpu, int *unit) +{ + int ret = 0, devmax, npresent, nup, ndev; + + *unit = -1; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (!nup) { + ret = -ENETDOWN; + goto done; + } + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + if (pcibus_to_node(dd->pcidev->bus) < 0) { + ret = -EINVAL; + goto done; + } + if (cpu_to_node(cpu) == + pcibus_to_node(dd->pcidev->bus)) { + *unit = ndev; + goto done; + } + } + } +done: + return ret; +} + +static int do_qib_user_sdma_queue_create(struct file *fp) +{ + struct qib_filedata *fd = fp->private_data; + struct qib_ctxtdata *rcd = fd->rcd; + struct qib_devdata *dd = rcd->dd; + + if (dd->flags & QIB_HAS_SEND_DMA) { + + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, + rcd->ctxt, + fd->subctxt); + if (!fd->pq) + return -ENOMEM; + } + + return 0; +} + /* * Get ctxt early, so can set affinity prior to memory allocation. */ @@ -1509,60 +1623,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) if (qib_compatible_subctxts(swmajor, swminor) && uinfo->spu_subctxt_cnt) { ret = find_shared_ctxt(fp, uinfo); - if (ret) { - if (ret > 0) - ret = 0; - goto done_chk_sdma; + if (ret > 0) { + ret = do_qib_user_sdma_queue_create(fp); + if (!ret) + assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); + goto done_ok; } } - i_minor = iminor(fp->f_dentry->d_inode) - QIB_USER_MINOR_BASE; + i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; if (i_minor) ret = find_free_ctxt(i_minor - 1, fp, uinfo); - else + else { + int unit; + const unsigned int cpu = cpumask_first(¤t->cpus_allowed); + const unsigned int weight = + cpumask_weight(¤t->cpus_allowed); + + if (weight == 1 && !test_bit(cpu, qib_cpulist)) + if (!find_hca(cpu, &unit) && unit >= 0) + if (!find_free_ctxt(unit, fp, uinfo)) { + ret = 0; + goto done_chk_sdma; + } ret = get_a_ctxt(fp, uinfo, alg); - -done_chk_sdma: - if (!ret) { - struct qib_filedata *fd = fp->private_data; - const struct qib_ctxtdata *rcd = fd->rcd; - const struct qib_devdata *dd = rcd->dd; - - if (dd->flags & QIB_HAS_SEND_DMA) { - fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, - dd->unit, - rcd->ctxt, - fd->subctxt); - if (!fd->pq) - ret = -ENOMEM; - } - - /* - * If process has NOT already set it's affinity, select and - * reserve a processor for it, as a rendevous for all - * users of the driver. If they don't actually later - * set affinity to this cpu, or set it to some other cpu, - * it just means that sooner or later we don't recommend - * a cpu, and let the scheduler do it's best. - */ - if (!ret && cpus_weight(current->cpus_allowed) >= - qib_cpulist_count) { - int cpu; - cpu = find_first_zero_bit(qib_cpulist, - qib_cpulist_count); - if (cpu != qib_cpulist_count) { - __set_bit(cpu, qib_cpulist); - fd->rec_cpu_num = cpu; - } - } else if (cpus_weight(current->cpus_allowed) == 1 && - test_bit(first_cpu(current->cpus_allowed), - qib_cpulist)) - qib_devinfo(dd->pcidev, "%s PID %u affinity " - "set to cpu %d; already allocated\n", - current->comm, current->pid, - first_cpu(current->cpus_allowed)); } +done_chk_sdma: + if (!ret) + ret = do_qib_user_sdma_queue_create(fp); +done_ok: mutex_unlock(&qib_mutex); done: @@ -1657,7 +1747,7 @@ static int qib_do_user_init(struct file *fp, * 0 to 1. So for those chips, we turn it off and then back on. * This will (very briefly) affect any other open ctxts, but the * duration is very short, and therefore isn't an issue. We - * explictly set the in-memory tail copy to 0 beforehand, so we + * explicitly set the in-memory tail copy to 0 beforehand, so we * don't have to wait to be sure the DMA update has happened * (chip resets head/tail to 0 on transition to enable). */ @@ -1791,6 +1881,7 @@ static int qib_close(struct inode *in, struct file *fp) if (dd->pageshadow) unlock_expected_tids(rcd); qib_stats.sps_ctxts--; + dd->freectxts++; } mutex_unlock(&qib_mutex); @@ -1904,8 +1995,9 @@ int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit) struct qib_ctxtdata *rcd; unsigned ctxt; int ret = 0; + unsigned long flags; - spin_lock(&ppd->dd->uctxt_lock); + spin_lock_irqsave(&ppd->dd->uctxt_lock, flags); for (ctxt = ppd->dd->first_user_ctxt; ctxt < ppd->dd->cfgctxts; ctxt++) { rcd = ppd->dd->rcd[ctxt]; @@ -1924,7 +2016,7 @@ int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit) ret = 1; break; } - spin_unlock(&ppd->dd->uctxt_lock); + spin_unlock_irqrestore(&ppd->dd->uctxt_lock, flags); return ret; } @@ -2180,8 +2272,7 @@ int qib_cdev_init(int minor, const char *name, cdev = cdev_alloc(); if (!cdev) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not allocate cdev for minor %d, %s\n", + pr_err("Could not allocate cdev for minor %d, %s\n", minor, name); ret = -ENOMEM; goto done; @@ -2193,19 +2284,17 @@ int qib_cdev_init(int minor, const char *name, ret = cdev_add(cdev, dev, 1); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not add cdev for minor %d, %s (err %d)\n", + pr_err("Could not add cdev for minor %d, %s (err %d)\n", minor, name, -ret); goto err_cdev; } - device = device_create(qib_class, NULL, dev, NULL, name); + device = device_create(qib_class, NULL, dev, NULL, "%s", name); if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); device = NULL; - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device for minor %d, %s (err %d)\n", + pr_err("Could not create device for minor %d, %s (err %d)\n", minor, name, -ret); err_cdev: cdev_del(cdev); @@ -2240,16 +2329,14 @@ int __init qib_dev_init(void) ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME ": Could not allocate " - "chrdev region (err %d)\n", -ret); + pr_err("Could not allocate chrdev region (err %d)\n", -ret); goto done; } qib_class = class_create(THIS_MODULE, "ipath"); if (IS_ERR(qib_class)) { ret = PTR_ERR(qib_class); - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device class (err %d)\n", -ret); + pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(qib_dev, QIB_NMINORS); } |
