diff options
Diffstat (limited to 'drivers/infiniband/hw/qib')
42 files changed, 5641 insertions, 2124 deletions
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 7c03a70c55a..495be09781b 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -1,7 +1,15 @@ config INFINIBAND_QIB - tristate "QLogic PCIe HCA support" - depends on 64BIT && NET + tristate "Intel PCIe HCA support" + depends on 64BIT ---help--- - This is a low-level driver for QLogic PCIe QLE InfiniBand host - channel adapters. This driver does not support the QLogic + This is a low-level driver for Intel PCIe QLE InfiniBand host + channel adapters. This driver does not support the Intel HyperTransport card (model QHT7140). + +config INFINIBAND_QIB_DCA + bool "QIB DCA support" + depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m) + default y + ---help--- + Setting this enables DCA support on some Intel chip sets + with the iba7322 HCA. diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index f12d7bb8b39..57f8103e51f 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o +ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 64c9e7d02d4..c00ae093b6f 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,8 +1,8 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -51,6 +51,7 @@ #include <linux/completion.h> #include <linux/kref.h> #include <linux/sched.h> +#include <linux/kthread.h> #include "qib_common.h" #include "qib_verbs.h" @@ -87,8 +88,7 @@ struct qlogic_ib_stats { }; extern struct qlogic_ib_stats qib_stats; -extern struct pci_error_handlers qib_pci_err_handler; -extern struct pci_driver qib_driver; +extern const struct pci_error_handlers qib_pci_err_handler; #define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ /* @@ -114,6 +114,11 @@ struct qib_eep_log_mask { /* * Below contains all data related to a single context (formerly called port). */ + +#ifdef CONFIG_DEBUG_FS +struct qib_opcode_stats_perctx; +#endif + struct qib_ctxtdata { void **rcvegrbuf; dma_addr_t *rcvegrbuf_phys; @@ -154,6 +159,8 @@ struct qib_ctxtdata { */ /* instead of calculating it */ unsigned ctxt; + /* local node of context */ + int node_id; /* non-zero if ctxt is being shared. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ @@ -171,7 +178,9 @@ struct qib_ctxtdata { /* how many alloc_pages() chunks in rcvegrbuf_pages */ u32 rcvegrbuf_chunks; /* how many egrbufs per chunk */ - u32 rcvegrbufs_perchunk; + u16 rcvegrbufs_perchunk; + /* ilog2 of above */ + u16 rcvegrbufs_perchunk_shift; /* order for rcvegrbuf_pages */ size_t rcvegrbuf_size; /* rcvhdrq size (for freeing) */ @@ -220,9 +229,15 @@ struct qib_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - u32 pkt_count; + /* lookaside fields */ + struct qib_qp *lookaside_qp; + u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; +#ifdef CONFIG_DEBUG_FS + /* verbs stats per CTX */ + struct qib_opcode_stats_perctx *opstats; +#endif }; struct qib_sge_state; @@ -422,6 +437,24 @@ struct qib_verbs_txreq { /* how often we check for packet activity for "power on hours (in seconds) */ #define ACTIVITY_TIMER 5 +#define MAX_NAME_SIZE 64 + +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify; +#endif + +struct qib_msix_entry { + struct msix_entry msix; + void *arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca; + int rcv; + struct qib_irq_notify *notifier; +#endif + char name[MAX_NAME_SIZE]; + cpumask_var_t mask; +}; + /* Below is an opaque struct. Each chip (device) can maintain * private data needed for its operation, but not germane to the * rest of the driver. For convenience, we define another that @@ -506,6 +539,7 @@ struct qib_pportdata { struct qib_devdata *dd; struct qib_chippport_specific *cpspec; /* chip-specific per-port */ struct kobject pport_kobj; + struct kobject pport_cc_kobj; struct kobject sl2vl_kobj; struct kobject diagc_kobj; @@ -517,8 +551,6 @@ struct qib_pportdata { /* qib_lflags driver is waiting for */ u32 state_wanted; spinlock_t lflags_lock; - /* number of (port-specific) interrupts for this port -- saturates... */ - u32 int_counter; /* ref count for each pkey */ atomic_t pkeyrefs[4]; @@ -530,24 +562,29 @@ struct qib_pportdata { u64 *statusp; /* SendDMA related entries */ - spinlock_t sdma_lock; - struct qib_sdma_state sdma_state; - unsigned long sdma_buf_jiffies; + + /* read mostly */ struct qib_sdma_desc *sdma_descq; + struct workqueue_struct *qib_wq; + struct qib_sdma_state sdma_state; + dma_addr_t sdma_descq_phys; + volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ + dma_addr_t sdma_head_phys; + u16 sdma_descq_cnt; + + /* read/write using lock */ + spinlock_t sdma_lock ____cacheline_aligned_in_smp; + struct list_head sdma_activelist; + struct list_head sdma_userpending; u64 sdma_descq_added; u64 sdma_descq_removed; - u16 sdma_descq_cnt; u16 sdma_descq_tail; u16 sdma_descq_head; - u16 sdma_next_intr; - u16 sdma_reset_wait; u8 sdma_generation; - struct tasklet_struct sdma_sw_clean_up_task; - struct list_head sdma_activelist; + u8 sdma_intrequest; - dma_addr_t sdma_descq_phys; - volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ - dma_addr_t sdma_head_phys; + struct tasklet_struct sdma_sw_clean_up_task + ____cacheline_aligned_in_smp; wait_queue_head_t state_wait; /* for state_wanted */ @@ -624,6 +661,39 @@ struct qib_pportdata { struct timer_list led_override_timer; struct xmit_wait cong_stats; struct timer_list symerr_clear_timer; + + /* Synchronize access between driver writes and sysfs reads */ + spinlock_t cc_shadow_lock + ____cacheline_aligned_in_smp; + + /* Shadow copy of the congestion control table */ + struct cc_table_shadow *ccti_entries_shadow; + + /* Shadow copy of the congestion control entries */ + struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow; + + /* List of congestion control table entries */ + struct ib_cc_table_entry_shadow *ccti_entries; + + /* 16 congestion entries with each entry corresponding to a SL */ + struct ib_cc_congestion_entry_shadow *congestion_entries; + + /* Maximum number of congestion control entries that the agent expects + * the manager to send. + */ + u16 cc_supported_table_entries; + + /* Total number of congestion control table entries */ + u16 total_cct_entry; + + /* Bit map identifying service level */ + u16 cc_sl_control_map; + + /* maximum congestion control table index */ + u16 ccti_limit; + + /* CA's max number of 64 entry units in the congestion control table */ + u8 cc_max_table_entries; }; /* Observers. Not to be taken lightly, possibly not to ship. */ @@ -653,7 +723,7 @@ struct diag_observer_list_elt; /* device data struct now contains only "general per-device" info. * fields related to a physical IB port are in a qib_pportdata struct, - * described above) while fields only used by a particualr chip-type are in + * described above) while fields only used by a particular chip-type are in * a qib_chipdata struct, whose contents are opaque to this file. */ struct qib_devdata { @@ -766,7 +836,7 @@ struct qib_devdata { void (*f_sdma_hw_start_up)(struct qib_pportdata *); void (*f_sdma_init_early)(struct qib_pportdata *); void (*f_set_cntr_sample)(struct qib_pportdata *, u32, u32); - void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32); + void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32, u32); u32 (*f_hdrqempty)(struct qib_ctxtdata *); u64 (*f_portcntr)(struct qib_pportdata *, u32); u32 (*f_read_cntrs)(struct qib_devdata *, loff_t, char **, @@ -780,6 +850,9 @@ struct qib_devdata { struct qib_ctxtdata *); void (*f_writescratch)(struct qib_devdata *, u32); int (*f_tempsense_rd)(struct qib_devdata *, int regnum); +#ifdef CONFIG_INFINIBAND_QIB_DCA + int (*f_notify_dca)(struct qib_devdata *, unsigned long event); +#endif char *boardname; /* human readable board info */ @@ -795,8 +868,10 @@ struct qib_devdata { /* last buffer for user use */ u32 lastctxt_piobuf; - /* saturating counter of (non-port-specific) device interrupts */ - u32 int_counter; + /* reset value */ + u64 z_int_counter; + /* percpu intcounter */ + u64 __percpu *int_counter; /* pio bufs allocated per ctxt */ u32 pbufsctxt; @@ -807,6 +882,10 @@ struct qib_devdata { * supports, less gives more pio bufs/ctxt, etc. */ u32 cfgctxts; + /* + * number of ctxts available for PSM open + */ + u32 freectxts; /* * hint that we should update pioavailshadow before @@ -856,7 +935,14 @@ struct qib_devdata { * pio_writing. */ spinlock_t pioavail_lock; - + /* + * index of last buffer to optimize search for next + */ + u32 last_pio; + /* + * min kernel pio buffer to optimize search + */ + u32 min_kernel_pio; /* * Shadow copies of registers; size indicates read access size. * Most of them are readonly, but some are write-only register, @@ -936,7 +1022,9 @@ struct qib_devdata { /* chip address space used by 4k pio buffers */ u32 align4k; /* size of each rcvegrbuffer */ - u32 rcvegrbufsize; + u16 rcvegrbufsize; + /* log2 of above */ + u16 rcvegrbufsize_shift; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ @@ -1012,6 +1100,12 @@ struct qib_devdata { u8 psxmitwait_supported; /* cycle length of PS* counters in HW (in picoseconds) */ u16 psxmitwait_check_rate; + /* high volume overflow errors defered to tasklet */ + struct tasklet_struct error_tasklet; + /* per device cq worker */ + struct kthread_worker *worker; + + int assigned_node_id; /* NUMA node closest to HCA */ }; /* hol_state values */ @@ -1049,6 +1143,7 @@ extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; extern unsigned qib_wc_pat; +extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); int init_chip_wc_pat(struct qib_devdata *dd, u32); int qib_enable_wc(struct qib_devdata *dd); @@ -1090,8 +1185,8 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *); int qib_setup_eagerbufs(struct qib_ctxtdata *); void qib_set_ctxtcnt(struct qib_devdata *); int qib_create_ctxts(struct qib_devdata *dd); -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); -void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); +int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *); @@ -1234,11 +1329,18 @@ int qib_setup_sdma(struct qib_pportdata *); void qib_teardown_sdma(struct qib_pportdata *); void __qib_sdma_intr(struct qib_pportdata *); void qib_sdma_intr(struct qib_pportdata *); +void qib_user_sdma_send_desc(struct qib_pportdata *dd, + struct list_head *pktlist); int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, u32, struct qib_verbs_txreq *); /* ppd->sdma_lock should be locked before calling this. */ int qib_sdma_make_progress(struct qib_pportdata *dd); +static inline int qib_sdma_empty(const struct qib_pportdata *ppd) +{ + return ppd->sdma_descq_added == ppd->sdma_descq_removed; +} + /* must be called under qib_sdma_lock */ static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd) { @@ -1251,7 +1353,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd) return ppd->sdma_state.current_state == qib_sdma_state_s99_running; } int qib_sdma_running(struct qib_pportdata *); - +void dump_sdma_state(struct qib_pportdata *ppd); void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); @@ -1342,13 +1444,17 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *); int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, const struct pci_device_id *); void qib_pcie_ddcleanup(struct qib_devdata *); -int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct msix_entry *); +int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); int qib_reinit_intr(struct qib_devdata *); void qib_enable_intx(struct pci_dev *); void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); +/* interrupts for device */ +u64 qib_int_counter(struct qib_devdata *); +/* interrupt for all devices */ +u64 qib_sps_ints(void); /* * dma_addr wrappers - all 0's invalid for hw @@ -1376,6 +1482,7 @@ extern unsigned qib_n_krcv_queues; extern unsigned qib_sdma_fetch_arb; extern unsigned qib_compat_ddr_negotiate; extern int qib_special_trigger; +extern unsigned qib_numa_aware; extern struct mutex qib_mutex; @@ -1405,27 +1512,23 @@ extern struct mutex qib_mutex; * first to avoid possible serial port delays from printk. */ #define qib_early_err(dev, fmt, ...) \ - do { \ - dev_err(dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_err(dev, fmt, ##__VA_ARGS__) #define qib_dev_err(dd, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ - qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define qib_dev_warn(dd, fmt, ...) \ + dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) #define qib_dev_porterr(dd, port, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ + qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ + ##__VA_ARGS__) #define qib_devinfo(pcidev, fmt, ...) \ - do { \ - dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__) /* * this is used for formatting hw error messages... @@ -1433,6 +1536,7 @@ extern struct mutex qib_mutex; struct qib_hwerror_msgs { u64 mask; const char *msg; + size_t sz; }; #define QLOGIC_IB_HWE_MSG(a, b) { .mask = a, .msg = b } diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h index 21f374aa063..a5356cb4252 100644 --- a/drivers/infiniband/hw/qib/qib_7220.h +++ b/drivers/infiniband/hw/qib/qib_7220.h @@ -97,7 +97,7 @@ struct qib_chippport_specific { u64 iblnkerrsnap; u64 ibcctrl; /* kr_ibcctrl shadow */ u64 ibcddrctrl; /* kr_ibcddrctrl shadow */ - u64 chase_end; + unsigned long chase_end; u32 last_delay_mult; }; diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index 145da404088..5670ace27c6 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -279,13 +279,12 @@ struct qib_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define QIB_USER_SWMINOR 11 +#define QIB_USER_SWMINOR 13 #define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) #ifndef QIB_KERN_TYPE #define QIB_KERN_TYPE 0 -#define QIB_IDSTR "QLogic kernel.org driver" #endif /* @@ -302,6 +301,19 @@ struct qib_base_info { #define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) /* + * Define the driver version number. This is something that refers only + * to the driver itself, not the software interfaces it supports. + */ +#define QIB_DRIVER_VERSION_BASE "1.11" + +/* create the final driver version string */ +#ifdef QIB_IDSTR +#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR +#else +#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE +#endif + +/* * If the unit is specified via open, HCA choice is fixed. If port is * specified, it's also fixed. Otherwise we try to spread contexts * across ports and HCAs, using different algorithims. WITHIN is @@ -689,7 +701,37 @@ struct qib_message_header { __be32 bth[3]; /* fields below this point are in host byte order */ struct qib_header iph; + /* fields below are simplified, but should match PSM */ + /* some are accessed by driver when packet spliting is needed */ __u8 sub_opcode; + __u8 flags; + __u16 commidx; + __u32 ack_seq_num; + __u8 flowid; + __u8 hdr_dlen; + __u16 mqhdr; + __u32 uwords[4]; +}; + +/* sequence number bits for message */ +union qib_seqnum { + struct { + __u32 seq:11; + __u32 gen:8; + __u32 flow:5; + }; + struct { + __u32 pkt:16; + __u32 msg:8; + }; + __u32 val; +}; + +/* qib receiving-dma tid-session-member */ +struct qib_tid_session_member { + __u16 tid; + __u16 offset; + __u16 length; }; /* IB - LRH header consts */ diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index a86cbf880f9..ab4e11cfab1 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -34,8 +35,10 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/kthread.h> #include "qib_verbs.h" +#include "qib.h" /** * qib_cq_enter - add a new entry to the completion queue @@ -100,14 +103,20 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || - (cq->notify == IB_CQ_SOLICITED && solicited)) { - cq->notify = IB_CQ_NONE; - cq->triggered++; + (cq->notify == IB_CQ_SOLICITED && + (solicited || entry->status != IB_WC_SUCCESS))) { + struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - queue_work(qib_cq_wq, &cq->comptask); + smp_rmb(); + worker = cq->dd->worker; + if (likely(worker)) { + cq->notify = IB_CQ_NONE; + cq->triggered++; + queue_kthread_work(worker, &cq->comptask); + } } spin_unlock_irqrestore(&cq->lock, flags); @@ -162,7 +171,7 @@ bail: return npolled; } -static void send_complete(struct work_struct *work) +static void send_complete(struct kthread_work *work) { struct qib_cq *cq = container_of(work, struct qib_cq, comptask); @@ -286,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, * The number of entries should be >= the number requested or return * an error. */ + cq->dd = dd_from_dev(dev); cq->ibcq.cqe = entries; cq->notify = IB_CQ_NONE; cq->triggered = 0; spin_lock_init(&cq->lock); - INIT_WORK(&cq->comptask, send_complete); + init_kthread_work(&cq->comptask, send_complete); wc->head = 0; wc->tail = 0; cq->queue = wc; @@ -322,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq) struct qib_ibdev *dev = to_idev(ibcq->device); struct qib_cq *cq = to_icq(ibcq); - flush_work(&cq->comptask); + flush_kthread_work(&cq->comptask); spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; spin_unlock(&dev->n_cqs_lock); @@ -482,3 +492,49 @@ bail_free: bail: return ret; } + +int qib_cq_init(struct qib_devdata *dd) +{ + int ret = 0; + int cpu; + struct task_struct *task; + + if (dd->worker) + return 0; + dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); + if (!dd->worker) + return -ENOMEM; + init_kthread_worker(dd->worker); + task = kthread_create_on_node( + kthread_worker_fn, + dd->worker, + dd->assigned_node_id, + "qib_cq%d", dd->unit); + if (IS_ERR(task)) + goto task_fail; + cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); + kthread_bind(task, cpu); + wake_up_process(task); +out: + return ret; +task_fail: + ret = PTR_ERR(task); + kfree(dd->worker); + dd->worker = NULL; + goto out; +} + +void qib_cq_exit(struct qib_devdata *dd) +{ + struct kthread_worker *worker; + + worker = dd->worker; + if (!worker) + return; + /* blocks future queuing from send_complete() */ + dd->worker = NULL; + smp_wmb(); + flush_kthread_worker(worker); + kthread_stop(worker->task); + kfree(worker); +} diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c new file mode 100644 index 00000000000..799a0c3bffc --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -0,0 +1,283 @@ +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/export.h> + +#include "qib.h" +#include "qib_verbs.h" +#include "qib_debugfs.h" + +static struct dentry *qib_dbg_root; + +#define DEBUGFS_FILE(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ +}; \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ +} \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = seq_release \ +}; + +#define DEBUGFS_FILE_CREATE(name) \ +do { \ + struct dentry *ent; \ + ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \ + ibd, &_##name##_file_ops); \ + if (!ent) \ + pr_warn("create of " #name " failed\n"); \ +} while (0) + +static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + + +static void _opcode_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _opcode_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_user_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu\n", i, + (unsigned long long) n_packets, + (unsigned long long) n_bytes); + + return 0; +} + +DEBUGFS_FILE(opcode_stats) + +static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (!*pos) + return SEQ_START_TOKEN; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) + return pos; + + ++*pos; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void _ctx_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _ctx_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos; + loff_t i, j; + u64 n_packets = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) { + seq_puts(s, "Ctx:npkts\n"); + return 0; + } + + spos = v; + i = *spos; + + if (!dd->rcd[i]) + return SEQ_SKIP; + + for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++) + n_packets += dd->rcd[i]->opstats->stats[j].n_packets; + + if (!n_packets) + return SEQ_SKIP; + + seq_printf(s, " %llu:%llu\n", i, n_packets); + return 0; +} + +DEBUGFS_FILE(ctx_stats) + +static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_qp_iter *iter; + loff_t n = *pos; + + iter = qib_qp_iter_init(s->private); + if (!iter) + return NULL; + + while (n--) { + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + } + + return iter; +} + +static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, + loff_t *pos) +{ + struct qib_qp_iter *iter = iter_ptr; + + (*pos)++; + + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) +{ + /* nothing for now */ +} + +static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) +{ + struct qib_qp_iter *iter = iter_ptr; + + if (!iter) + return 0; + + qib_qp_iter_print(s, iter); + + return 0; +} + +DEBUGFS_FILE(qp_stats) + +void qib_dbg_ibdev_init(struct qib_ibdev *ibd) +{ + char name[10]; + + snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit); + ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root); + if (!ibd->qib_ibdev_dbg) { + pr_warn("create of %s failed\n", name); + return; + } + DEBUGFS_FILE_CREATE(opcode_stats); + DEBUGFS_FILE_CREATE(ctx_stats); + DEBUGFS_FILE_CREATE(qp_stats); + return; +} + +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd) +{ + if (!qib_dbg_root) + goto out; + debugfs_remove_recursive(ibd->qib_ibdev_dbg); +out: + ibd->qib_ibdev_dbg = NULL; +} + +void qib_dbg_init(void) +{ + qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL); + if (!qib_dbg_root) + pr_warn("init of debugfs failed\n"); +} + +void qib_dbg_exit(void) +{ + debugfs_remove_recursive(qib_dbg_root); + qib_dbg_root = NULL; +} + +#endif + diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h new file mode 100644 index 00000000000..7ae983a91b8 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.h @@ -0,0 +1,45 @@ +#ifndef _QIB_DEBUGFS_H +#define _QIB_DEBUGFS_H + +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +struct qib_ibdev; +void qib_dbg_ibdev_init(struct qib_ibdev *ibd); +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd); +void qib_dbg_init(void); +void qib_dbg_exit(void); + +#endif + +#endif /* _QIB_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 204c4dd9dce..5dfda4c5cc9 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2010 QLogic Corporation. All rights reserved. - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -46,12 +46,16 @@ #include <linux/pci.h> #include <linux/poll.h> #include <linux/vmalloc.h> +#include <linux/export.h> #include <linux/fs.h> #include <linux/uaccess.h> #include "qib.h" #include "qib_common.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + /* * Each client that opens the diag device must read then write * offset 0, to prevent lossage from random cat or od. diag_state @@ -542,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct qib_diag_xpkt dp; u32 *tmpbuf = NULL; struct qib_devdata *dd; @@ -586,19 +590,24 @@ static ssize_t qib_diagpkt_write(struct file *fp, } ppd = &dd->pport[dp.port - 1]; - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - clen = dp.len >> 2; - - if ((plen + 4) > ppd->ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > ppd->ibmaxlen - maxlen_reserve) { ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { - qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, " - "failing\n"); + qib_devinfo(dd->pcidev, + "Unable to allocate tmp buffer, failing\n"); ret = -ENOMEM; goto bail; } @@ -634,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp, */ if (dd->flags & QIB_PIO_FLUSH_WC) { qib_flush_wc(); - qib_pio_copy(piobuf + 2, tmpbuf, clen - 1); + qib_pio_copy(piobuf + 2, tmpbuf, plen - 1); qib_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - qib_pio_copy(piobuf + 2, tmpbuf, clen); + qib_pio_copy(piobuf + 2, tmpbuf, plen); if (dd->flags & QIB_USE_SPCL_TRIG) { u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; @@ -685,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd, const struct diag_observer *op) { struct diag_observer_list_elt *olp; - int ret = -EINVAL; + unsigned long flags; if (!dd || !op) - goto bail; - ret = -ENOMEM; + return -EINVAL; olp = vmalloc(sizeof *olp); if (!olp) { - printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n"); - goto bail; + pr_err("vmalloc for observer failed\n"); + return -ENOMEM; } - if (olp) { - unsigned long flags; - spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); - olp->op = op; - olp->next = dd->diag_observer_list; - dd->diag_observer_list = olp; - spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); - ret = 0; - } -bail: - return ret; + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp->op = op; + olp->next = dd->diag_observer_list; + dd->diag_observer_list = olp; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + + return 0; } /* Remove all registered observers when device is closed */ diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c index 2920bb39a65..59fe092b4b0 100644 --- a/drivers/infiniband/hw/qib/qib_dma.c +++ b/drivers/infiniband/hw/qib/qib_dma.c @@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int qib_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = { .unmap_page = qib_dma_unmap_page, .map_sg = qib_map_sg, .unmap_sg = qib_unmap_sg, - .dma_address = qib_sg_dma_address, - .dma_len = qib_sg_dma_len, .sync_single_for_cpu = qib_sync_single_for_cpu, .sync_single_for_device = qib_sync_single_for_device, .alloc_coherent = qib_dma_alloc_coherent, diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 9cd193603fb..5bee08f16d7 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -37,6 +38,8 @@ #include <linux/delay.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/prefetch.h> #include "qib.h" @@ -44,7 +47,7 @@ * The size has to be longer than this string, so we can append * board/chip information to it in the init code. */ -const char ib_qib_version[] = QIB_IDSTR "\n"; +const char ib_qib_version[] = QIB_DRIVER_VERSION "\n"; DEFINE_SPINLOCK(qib_devs_lock); LIST_HEAD(qib_dev_list); @@ -61,8 +64,9 @@ MODULE_PARM_DESC(compat_ddr_negotiate, "Attempt pre-IBTA 1.2 DDR speed negotiation"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("QLogic <support@qlogic.com>"); -MODULE_DESCRIPTION("QLogic IB driver"); +MODULE_AUTHOR("Intel <ibsupport@intel.com>"); +MODULE_DESCRIPTION("Intel IB driver"); +MODULE_VERSION(QIB_DRIVER_VERSION); /* * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our @@ -71,6 +75,11 @@ MODULE_DESCRIPTION("QLogic IB driver"); */ #define QIB_PIO_MAXIBHDR 128 +/* + * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt. + */ +#define QIB_MAX_PKT_RECV 64 + struct qlogic_ib_stats qib_stats; const char *qib_get_unit_name(int unit) @@ -274,24 +283,151 @@ bail: */ static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail) { - const u32 chunk = etail / rcd->rcvegrbufs_perchunk; - const u32 idx = etail % rcd->rcvegrbufs_perchunk; + const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift; + const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1); - return rcd->rcvegrbuf[chunk] + idx * rcd->dd->rcvegrbufsize; + return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift); } /* * Returns 1 if error was a CRC, else 0. * Needed for some chip's synthesized error counters. */ -static u32 qib_rcv_hdrerr(struct qib_pportdata *ppd, u32 ctxt, - u32 eflags, u32 l, u32 etail, __le32 *rhf_addr, - struct qib_message_header *hdr) +static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, + u32 ctxt, u32 eflags, u32 l, u32 etail, + __le32 *rhf_addr, struct qib_message_header *rhdr) { u32 ret = 0; if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR)) ret = 1; + else if (eflags == QLOGIC_IB_RHF_H_TIDERR) { + /* For TIDERR and RC QPs premptively schedule a NAK */ + struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; + struct qib_other_headers *ohdr = NULL; + struct qib_ibport *ibp = &ppd->ibport_data; + struct qib_qp *qp = NULL; + u32 tlen = qib_hdrget_length_in_bytes(rhf_addr); + u16 lid = be16_to_cpu(hdr->lrh[1]); + int lnh = be16_to_cpu(hdr->lrh[0]) & 3; + u32 qp_num; + u32 opcode; + u32 psn; + int diff; + + /* Sanity check packet */ + if (tlen < 24) + goto drop; + + if (lid < QIB_MULTICAST_LID_BASE) { + lid &= ~((1 << ppd->lmc) - 1); + if (unlikely(lid != ppd->lid)) + goto drop; + } + + /* Check for GRH */ + if (lnh == QIB_LRH_BTH) + ohdr = &hdr->u.oth; + else if (lnh == QIB_LRH_GRH) { + u32 vtf; + + ohdr = &hdr->u.l.oth; + if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else + goto drop; + + /* Get opcode and PSN from packet */ + opcode = be32_to_cpu(ohdr->bth[0]); + opcode >>= 24; + psn = be32_to_cpu(ohdr->bth[2]); + + /* Get the destination QP number. */ + qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; + if (qp_num != QIB_MULTICAST_QPN) { + int ruc_res; + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + + /* + * Handle only RC QPs - for other QP types drop error + * packet. + */ + spin_lock(&qp->r_lock); + + /* Check for valid receive state. */ + if (!(ib_qib_state_ops[qp->state] & + QIB_PROCESS_RECV_OK)) { + ibp->n_pkt_drops++; + goto unlock; + } + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + ruc_res = + qib_ruc_check_hdr( + ibp, hdr, + lnh == QIB_LRH_GRH, + qp, + be32_to_cpu(ohdr->bth[0])); + if (ruc_res) + goto unlock; + + /* Only deal with RDMA Writes for now */ + if (opcode < + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { + diff = qib_cmp24(psn, qp->r_psn); + if (!qp->r_nak_state && diff >= 0) { + ibp->n_rc_seqnak++; + qp->r_nak_state = + IB_NAK_PSN_ERROR; + /* Use the expected PSN. */ + qp->r_ack_psn = qp->r_psn; + /* + * Wait to send the sequence + * NAK until all packets + * in the receive queue have + * been processed. + * Otherwise, we end up + * propagating congestion. + */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= + QIB_R_RSP_NAK; + atomic_inc( + &qp->refcount); + list_add_tail( + &qp->rspwait, + &rcd->qp_wait_list); + } + } /* Out of sequence NAK */ + } /* QP Request NAKs */ + break; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + case IB_QPT_UC: + default: + /* For now don't handle any other QP types */ + break; + } + +unlock: + spin_unlock(&qp->r_lock); + /* + * Notify qib_destroy_qp() if it is waiting + * for us to finish. + */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } /* Unicast QP */ + } /* Valid packet with TIDErr */ + +drop: return ret; } @@ -335,7 +471,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ } - for (last = 0, i = 1; !last && i <= 64; i += !last) { + for (last = 0, i = 1; !last; i += !last) { hdr = dd->f_get_msgheader(dd, rhf_addr); eflags = qib_hdrget_err_flags(rhf_addr); etype = qib_hdrget_rcv_type(rhf_addr); @@ -348,8 +484,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) etail = qib_hdrget_index(rhf_addr); updegr = 1; if (tlen > sizeof(*hdr) || - etype >= RCVHQ_RCV_TYPE_NON_KD) + etype >= RCVHQ_RCV_TYPE_NON_KD) { ebuf = qib_get_egrbuf(rcd, etail); + prefetch_range(ebuf, tlen - sizeof(*hdr)); + } } if (!eflags) { u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2; @@ -371,7 +509,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) * packets; only qibhdrerr should be set. */ if (unlikely(eflags)) - crcs += qib_rcv_hdrerr(ppd, rcd->ctxt, eflags, l, + crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l, etail, rhf_addr, hdr); else if (etype == RCVHQ_RCV_TYPE_NON_KD) { qib_ib_rcv(rcd, hdr, ebuf, tlen); @@ -384,6 +522,9 @@ move_along: l += rsize; if (l >= maxcnt) l = 0; + if (i == QIB_MAX_PKT_RECV) + last = 1; + rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; if (dd->flags & QIB_NODMA_RTAIL) { u32 seq = qib_hdrget_seq(rhf_addr); @@ -402,13 +543,21 @@ move_along: */ lval = l; if (!last && !(i & 0xf)) { - dd->f_update_usrhead(rcd, lval, updegr, etail); + dd->f_update_usrhead(rcd, lval, updegr, etail, i); updegr = 0; } } + /* + * Notify qib_destroy_qp() if it is waiting + * for lookaside_qp to finish. + */ + if (rcd->lookaside_qp) { + if (atomic_dec_and_test(&rcd->lookaside_qp->refcount)) + wake_up(&rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } rcd->head = l; - rcd->pkt_count += i; /* * Iterate over all QPs waiting to respond. @@ -444,7 +593,7 @@ bail: * if no packets were processed. */ lval = (u64)rcd->head | dd->rhdrhead_intr_off; - dd->f_update_usrhead(rcd, lval, updegr, etail); + dd->f_update_usrhead(rcd, lval, updegr, etail, i); return crcs; } @@ -616,8 +765,9 @@ int qib_reset_device(int unit) qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit); if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) { - qib_devinfo(dd->pcidev, "Invalid unit number %u or " - "not initialized or not present\n", unit); + qib_devinfo(dd->pcidev, + "Invalid unit number %u or not initialized or not present\n", + unit); ret = -ENXIO; goto bail; } @@ -654,11 +804,13 @@ int qib_reset_device(int unit) else ret = -EAGAIN; if (ret) - qib_dev_err(dd, "Reinitialize unit %u after " - "reset failed with %d\n", unit, ret); + qib_dev_err(dd, + "Reinitialize unit %u after reset failed with %d\n", + unit, ret); else - qib_devinfo(dd->pcidev, "Reinitialized unit %u after " - "resetting\n", unit); + qib_devinfo(dd->pcidev, + "Reinitialized unit %u after resetting\n", + unit); bail: return ret; diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 92d9cfe98a6..4d5d71aaa2b 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -160,10 +161,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) if (oguid > bguid[7]) { if (bguid[6] == 0xff) { if (bguid[5] == 0xff) { - qib_dev_err(dd, "Can't set %s GUID" - " from base, wraps to" - " OUI!\n", - qib_get_unit_name(t)); + qib_dev_err(dd, + "Can't set %s GUID from base, wraps to OUI!\n", + qib_get_unit_name(t)); dd->base_guid = 0; goto bail; } @@ -182,8 +182,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) len = sizeof(struct qib_flash); buf = vmalloc(len); if (!buf) { - qib_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for GUID\n", len); + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for GUID\n", + len); goto bail; } @@ -201,23 +202,25 @@ void qib_get_eeprom_info(struct qib_devdata *dd) csum = flash_csum(ifp, 0); if (csum != ifp->if_csum) { - qib_devinfo(dd->pcidev, "Bad I2C flash checksum: " - "0x%x, not 0x%x\n", csum, ifp->if_csum); + qib_devinfo(dd->pcidev, + "Bad I2C flash checksum: 0x%x, not 0x%x\n", + csum, ifp->if_csum); goto done; } if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { - qib_dev_err(dd, "Invalid GUID %llx from flash; ignoring\n", - *(unsigned long long *) ifp->if_guid); + qib_dev_err(dd, + "Invalid GUID %llx from flash; ignoring\n", + *(unsigned long long *) ifp->if_guid); /* don't allow GUID if all 0 or all 1's */ goto done; } /* complain, but allow it */ if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) - qib_devinfo(dd->pcidev, "Warning, GUID %llx is " - "default, probably not correct!\n", - *(unsigned long long *) ifp->if_guid); + qib_devinfo(dd->pcidev, + "Warning, GUID %llx is default, probably not correct!\n", + *(unsigned long long *) ifp->if_guid); bguid = ifp->if_guid; if (!bguid[0] && !bguid[1] && !bguid[2]) { @@ -260,8 +263,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) memcpy(dd->serial, ifp->if_serial, sizeof ifp->if_serial); if (!strstr(ifp->if_comment, "Tested successfully")) - qib_dev_err(dd, "Board SN %s did not pass functional " - "test: %s\n", dd->serial, ifp->if_comment); + qib_dev_err(dd, + "Board SN %s did not pass functional test: %s\n", + dd->serial, ifp->if_comment); memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT); /* @@ -323,8 +327,9 @@ int qib_update_eeprom_log(struct qib_devdata *dd) buf = vmalloc(len); ret = 1; if (!buf) { - qib_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for logging\n", len); + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for logging\n", + len); goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 79d9971aff1..b15e34eeef6 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -39,15 +39,19 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/io.h> -#include <linux/uio.h> +#include <linux/aio.h> #include <linux/jiffies.h> #include <asm/pgtable.h> #include <linux/delay.h> +#include <linux/export.h> #include "qib.h" #include "qib_common.h" #include "qib_user_sdma.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + static int qib_open(struct inode *, struct file *); static int qib_close(struct inode *, struct file *); static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *); @@ -314,8 +318,9 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, } if (cnt > tidcnt) { /* make sure it all fits in tid_pg_list */ - qib_devinfo(dd->pcidev, "Process tried to allocate %u " - "TIDs, only trying max (%u)\n", cnt, tidcnt); + qib_devinfo(dd->pcidev, + "Process tried to allocate %u TIDs, only trying max (%u)\n", + cnt, tidcnt); cnt = tidcnt; } pagep = (struct page **) rcd->tid_pg_list; @@ -749,9 +754,9 @@ static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd, ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); if (ret) - qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x " - "bytes failed: %d\n", what, rcd->ctxt, - pfn, len, ret); + qib_devinfo(dd->pcidev, + "%s ctxt%u mmap of %lx, %x bytes failed: %d\n", + what, rcd->ctxt, pfn, len, ret); bail: return ret; } @@ -770,8 +775,9 @@ static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd, */ sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE; if ((vma->vm_end - vma->vm_start) > sz) { - qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen " - "%lx > PAGE\n", vma->vm_end - vma->vm_start); + qib_devinfo(dd->pcidev, + "FAIL mmap userreg: reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); ret = -EFAULT; } else { phys = dd->physaddr + ureg; @@ -801,8 +807,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, * for it. */ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) { - qib_devinfo(dd->pcidev, "FAIL mmap piobufs: " - "reqlen %lx > PAGE\n", + qib_devinfo(dd->pcidev, + "FAIL mmap piobufs: reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); ret = -EINVAL; goto bail; @@ -846,8 +852,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, size = rcd->rcvegrbuf_size; total_size = rcd->rcvegrbuf_chunks * size; if ((vma->vm_end - vma->vm_start) > total_size) { - qib_devinfo(dd->pcidev, "FAIL on egr bufs: " - "reqlen %lx > actual %lx\n", + qib_devinfo(dd->pcidev, + "FAIL on egr bufs: reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); ret = -EINVAL; @@ -855,8 +861,9 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, } if (vma->vm_flags & VM_WRITE) { - qib_devinfo(dd->pcidev, "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); + qib_devinfo(dd->pcidev, + "Can't map eager buffers as writable (flags=%lx)\n", + vma->vm_flags); ret = -EPERM; goto bail; } @@ -964,7 +971,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &qib_file_vm_ops; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ret = 1; bail: @@ -1148,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt) return pollflag; } +static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) +{ + struct qib_filedata *fd = fp->private_data; + const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); + int local_cpu; + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it on the local NUMA node. + */ + if ((weight >= qib_cpulist_count) && + (cpumask_weight(local_mask) <= qib_cpulist_count)) { + for_each_cpu(local_cpu, local_mask) + if (!test_and_set_bit(local_cpu, qib_cpulist)) { + fd->rec_cpu_num = local_cpu; + return; + } + } + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it, as a rendevous for all + * users of the driver. If they don't actually later + * set affinity to this cpu, or set it to some other cpu, + * it just means that sooner or later we don't recommend + * a cpu, and let the scheduler do it's best. + */ + if (weight >= qib_cpulist_count) { + int cpu; + cpu = find_first_zero_bit(qib_cpulist, + qib_cpulist_count); + if (cpu == qib_cpulist_count) + qib_dev_err(dd, + "no cpus avail for affinity PID %u\n", + current->pid); + else { + __set_bit(cpu, qib_cpulist); + fd->rec_cpu_num = cpu; + } + } +} + /* * Check that userland and driver are compatible for subcontexts. */ @@ -1170,7 +1220,7 @@ static int qib_compatible_subctxts(int user_swmajor, int user_swminor) return user_swminor == 3; default: /* >= 4 are compatible (or are expected to be) */ - return user_swminor >= 4; + return user_swminor <= QIB_USER_SWMINOR; } } /* make no promises yet for future major versions */ @@ -1252,12 +1302,20 @@ bail: static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, struct file *fp, const struct qib_user_info *uinfo) { + struct qib_filedata *fd = fp->private_data; struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; void *ptmp = NULL; int ret; + int numa_id; + + assign_ctxt_affinity(fp, dd); - rcd = qib_create_ctxtdata(ppd, ctxt); + numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ? + cpu_to_node(fd->rec_cpu_num) : + numa_node_id()) : dd->assigned_node_id; + + rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); /* * Allocate memory for use in qib_tid_update() at open to @@ -1269,8 +1327,8 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, GFP_KERNEL); if (!rcd || !ptmp) { - qib_dev_err(dd, "Unable to allocate ctxtdata " - "memory, failing open\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata memory, failing open\n"); ret = -ENOMEM; goto bailerr; } @@ -1284,10 +1342,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); ctxt_fp(fp) = rcd; qib_stats.sps_ctxts++; + dd->freectxts--; ret = 0; goto bail; bailerr: + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + dd->rcd[ctxt] = NULL; kfree(rcd); kfree(ptmp); @@ -1379,17 +1441,17 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, /* find device (with ACTIVE ports) with fewest ctxts in use */ for (ndev = 0; ndev < devmax; ndev++) { struct qib_devdata *dd = qib_lookup(ndev); - unsigned cused = 0, cfree = 0; + unsigned cused = 0, cfree = 0, pusable = 0; if (!dd) continue; if (port && port <= dd->num_pports && usable(dd->pport + port - 1)) - dusable = 1; + pusable = 1; else for (i = 0; i < dd->num_pports; i++) if (usable(dd->pport + i)) - dusable++; - if (!dusable) + pusable++; + if (!pusable) continue; for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) @@ -1477,6 +1539,58 @@ static int qib_open(struct inode *in, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } +static int find_hca(unsigned int cpu, int *unit) +{ + int ret = 0, devmax, npresent, nup, ndev; + + *unit = -1; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (!nup) { + ret = -ENETDOWN; + goto done; + } + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + if (pcibus_to_node(dd->pcidev->bus) < 0) { + ret = -EINVAL; + goto done; + } + if (cpu_to_node(cpu) == + pcibus_to_node(dd->pcidev->bus)) { + *unit = ndev; + goto done; + } + } + } +done: + return ret; +} + +static int do_qib_user_sdma_queue_create(struct file *fp) +{ + struct qib_filedata *fd = fp->private_data; + struct qib_ctxtdata *rcd = fd->rcd; + struct qib_devdata *dd = rcd->dd; + + if (dd->flags & QIB_HAS_SEND_DMA) { + + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, + rcd->ctxt, + fd->subctxt); + if (!fd->pq) + return -ENOMEM; + } + + return 0; +} + /* * Get ctxt early, so can set affinity prior to memory allocation. */ @@ -1509,60 +1623,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) if (qib_compatible_subctxts(swmajor, swminor) && uinfo->spu_subctxt_cnt) { ret = find_shared_ctxt(fp, uinfo); - if (ret) { - if (ret > 0) - ret = 0; - goto done_chk_sdma; + if (ret > 0) { + ret = do_qib_user_sdma_queue_create(fp); + if (!ret) + assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); + goto done_ok; } } - i_minor = iminor(fp->f_dentry->d_inode) - QIB_USER_MINOR_BASE; + i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; if (i_minor) ret = find_free_ctxt(i_minor - 1, fp, uinfo); - else + else { + int unit; + const unsigned int cpu = cpumask_first(¤t->cpus_allowed); + const unsigned int weight = + cpumask_weight(¤t->cpus_allowed); + + if (weight == 1 && !test_bit(cpu, qib_cpulist)) + if (!find_hca(cpu, &unit) && unit >= 0) + if (!find_free_ctxt(unit, fp, uinfo)) { + ret = 0; + goto done_chk_sdma; + } ret = get_a_ctxt(fp, uinfo, alg); - -done_chk_sdma: - if (!ret) { - struct qib_filedata *fd = fp->private_data; - const struct qib_ctxtdata *rcd = fd->rcd; - const struct qib_devdata *dd = rcd->dd; - - if (dd->flags & QIB_HAS_SEND_DMA) { - fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, - dd->unit, - rcd->ctxt, - fd->subctxt); - if (!fd->pq) - ret = -ENOMEM; - } - - /* - * If process has NOT already set it's affinity, select and - * reserve a processor for it, as a rendevous for all - * users of the driver. If they don't actually later - * set affinity to this cpu, or set it to some other cpu, - * it just means that sooner or later we don't recommend - * a cpu, and let the scheduler do it's best. - */ - if (!ret && cpus_weight(current->cpus_allowed) >= - qib_cpulist_count) { - int cpu; - cpu = find_first_zero_bit(qib_cpulist, - qib_cpulist_count); - if (cpu != qib_cpulist_count) { - __set_bit(cpu, qib_cpulist); - fd->rec_cpu_num = cpu; - } - } else if (cpus_weight(current->cpus_allowed) == 1 && - test_bit(first_cpu(current->cpus_allowed), - qib_cpulist)) - qib_devinfo(dd->pcidev, "%s PID %u affinity " - "set to cpu %d; already allocated\n", - current->comm, current->pid, - first_cpu(current->cpus_allowed)); } +done_chk_sdma: + if (!ret) + ret = do_qib_user_sdma_queue_create(fp); +done_ok: mutex_unlock(&qib_mutex); done: @@ -1657,7 +1747,7 @@ static int qib_do_user_init(struct file *fp, * 0 to 1. So for those chips, we turn it off and then back on. * This will (very briefly) affect any other open ctxts, but the * duration is very short, and therefore isn't an issue. We - * explictly set the in-memory tail copy to 0 beforehand, so we + * explicitly set the in-memory tail copy to 0 beforehand, so we * don't have to wait to be sure the DMA update has happened * (chip resets head/tail to 0 on transition to enable). */ @@ -1791,6 +1881,7 @@ static int qib_close(struct inode *in, struct file *fp) if (dd->pageshadow) unlock_expected_tids(rcd); qib_stats.sps_ctxts--; + dd->freectxts++; } mutex_unlock(&qib_mutex); @@ -1904,8 +1995,9 @@ int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit) struct qib_ctxtdata *rcd; unsigned ctxt; int ret = 0; + unsigned long flags; - spin_lock(&ppd->dd->uctxt_lock); + spin_lock_irqsave(&ppd->dd->uctxt_lock, flags); for (ctxt = ppd->dd->first_user_ctxt; ctxt < ppd->dd->cfgctxts; ctxt++) { rcd = ppd->dd->rcd[ctxt]; @@ -1924,7 +2016,7 @@ int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit) ret = 1; break; } - spin_unlock(&ppd->dd->uctxt_lock); + spin_unlock_irqrestore(&ppd->dd->uctxt_lock, flags); return ret; } @@ -2180,8 +2272,7 @@ int qib_cdev_init(int minor, const char *name, cdev = cdev_alloc(); if (!cdev) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not allocate cdev for minor %d, %s\n", + pr_err("Could not allocate cdev for minor %d, %s\n", minor, name); ret = -ENOMEM; goto done; @@ -2193,19 +2284,17 @@ int qib_cdev_init(int minor, const char *name, ret = cdev_add(cdev, dev, 1); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not add cdev for minor %d, %s (err %d)\n", + pr_err("Could not add cdev for minor %d, %s (err %d)\n", minor, name, -ret); goto err_cdev; } - device = device_create(qib_class, NULL, dev, NULL, name); + device = device_create(qib_class, NULL, dev, NULL, "%s", name); if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); device = NULL; - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device for minor %d, %s (err %d)\n", + pr_err("Could not create device for minor %d, %s (err %d)\n", minor, name, -ret); err_cdev: cdev_del(cdev); @@ -2240,16 +2329,14 @@ int __init qib_dev_init(void) ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME ": Could not allocate " - "chrdev region (err %d)\n", -ret); + pr_err("Could not allocate chrdev region (err %d)\n", -ret); goto done; } qib_class = class_create(THIS_MODULE, "ipath"); if (IS_ERR(qib_class)) { ret = PTR_ERR(qib_class); - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device class (err %d)\n", -ret); + pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(qib_dev, QIB_NMINORS); } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index f99bddc0171..cab610ccd50 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -44,10 +45,10 @@ static struct super_block *qib_super; -#define private2dd(file) ((file)->f_dentry->d_inode->i_private) +#define private2dd(file) (file_inode(file)->i_private) static int qibfs_mknod(struct inode *dir, struct dentry *dentry, - int mode, const struct file_operations *fops, + umode_t mode, const struct file_operations *fops, void *data) { int error; @@ -60,14 +61,14 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_atime = CURRENT_TIME; inode->i_mtime = inode->i_atime; inode->i_ctime = inode->i_atime; inode->i_private = data; - if ((mode & S_IFMT) == S_IFDIR) { + if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); inc_nlink(dir); @@ -82,7 +83,7 @@ bail: return error; } -static int create_file(const char *name, mode_t mode, +static int create_file(const char *name, umode_t mode, struct dentry *parent, struct dentry **dentry, const struct file_operations *fops, void *data) { @@ -104,6 +105,7 @@ static int create_file(const char *name, mode_t mode, static ssize_t driver_stats_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + qib_stats.sps_ints = qib_sps_ints(); return simple_read_from_buffer(buf, count, ppos, &qib_stats, sizeof qib_stats); } @@ -170,7 +172,7 @@ static const struct file_operations cntr_ops[] = { }; /* - * Could use file->f_dentry->d_inode->i_ino to figure out which file, + * Could use file_inode(file)->i_ino to figure out which file, * instead of separate routine for each, but for now, this works... */ @@ -382,7 +384,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, &simple_dir_operations, dd); if (ret) { - printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); + pr_err("create_file(%s) failed: %d\n", unit, ret); goto bail; } @@ -390,21 +392,21 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp, &cntr_ops[0], dd); if (ret) { - printk(KERN_ERR "create_file(%s/counters) failed: %d\n", + pr_err("create_file(%s/counters) failed: %d\n", unit, ret); goto bail; } ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp, &cntr_ops[1], dd); if (ret) { - printk(KERN_ERR "create_file(%s/counter_names) failed: %d\n", + pr_err("create_file(%s/counter_names) failed: %d\n", unit, ret); goto bail; } ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp, &portcntr_ops[0], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, "portcounter_names", ret); goto bail; } @@ -416,7 +418,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, &portcntr_ops[i], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, fname, ret); goto bail; } @@ -426,7 +428,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, &qsfp_ops[i - 1], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, fname, ret); goto bail; } @@ -435,7 +437,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, &flash_ops, dd); if (ret) - printk(KERN_ERR "create_file(%s/flash) failed: %d\n", + pr_err("create_file(%s/flash) failed: %d\n", unit, ret); bail: return ret; @@ -453,18 +455,15 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } - spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); } else { spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); } + dput(tmp); ret = 0; bail: @@ -489,10 +488,11 @@ static int remove_device_files(struct super_block *sb, if (IS_ERR(dir)) { ret = PTR_ERR(dir); - printk(KERN_ERR "Lookup of %s failed\n", unit); + pr_err("Lookup of %s failed\n", unit); goto bail; } + mutex_lock(&dir->d_inode->i_mutex); remove_file(dir, "counters"); remove_file(dir, "counter_names"); remove_file(dir, "portcounter_names"); @@ -507,8 +507,10 @@ static int remove_device_files(struct super_block *sb, } } remove_file(dir, "flash"); - d_delete(dir); + mutex_unlock(&dir->d_inode->i_mutex); ret = simple_rmdir(root->d_inode, dir); + d_delete(dir); + dput(dir); bail: mutex_unlock(&root->d_inode->i_mutex); @@ -535,7 +537,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) ret = simple_fill_super(sb, QIBFS_MAGIC, files); if (ret) { - printk(KERN_ERR "simple_fill_super failed: %d\n", ret); + pr_err("simple_fill_super failed: %d\n", ret); goto bail; } @@ -606,6 +608,7 @@ static struct file_system_type qibfs_fs_type = { .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init qib_init_qibfs(void) { diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index a5e29dbb953..d68266ac761 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -51,7 +52,7 @@ static u32 qib_6120_iblink_state(u64); /* * This file contains all the chip-specific register information and - * access functions for the QLogic QLogic_IB PCI-Express chip. + * access functions for the Intel Intel_IB PCI-Express chip. * */ @@ -753,8 +754,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) return; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); return; } qib_stats.sps_hwerrs++; @@ -779,13 +780,14 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, * or it's occurred within the last 5 seconds. */ if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID)) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); if (hwerrs & ~IB_HWE_BITSEXTANT) - qib_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~IB_HWE_BITSEXTANT)); + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT)); ctrl = qib_read_kreg32(dd, kr_control); if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { @@ -815,8 +817,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcat(msg, "[Memory BIST test failed, InfiniPath hardware" - " unusable]", msgl); + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -868,8 +871,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, *msg = 0; /* recovered from all of them */ if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * for /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -1017,9 +1021,9 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) qib_inc_eeprom_err(dd, log_idx, 1); if (errs & ~IB_E_BITSEXTANT) - qib_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", - (unsigned long long) (errs & ~IB_E_BITSEXTANT)); + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); if (errs & E_SUM_ERRS) { qib_disarm_6120_senderrbufs(ppd); @@ -1089,8 +1093,8 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) } if (errs & ERR_MASK(ResetNegated)) { - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1541,8 +1545,9 @@ static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat) qib_stats.sps_errints++; estat = qib_read_kreg64(dd, kr_errstatus); if (!estat) - qib_devinfo(dd->pcidev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); handle_6120_errors(dd, estat); } @@ -1629,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) @@ -1715,16 +1718,16 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd) } if (!dd->cspec->irq) - qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); else { int ret; ret = request_irq(dd->cspec->irq, qib_6120intr, 0, QIB_DRV_NAME, dd); if (ret) - qib_dev_err(dd, "Couldn't setup interrupt " - "(irq=%d): %d\n", dd->cspec->irq, - ret); + qib_dev_err(dd, + "Couldn't setup interrupt (irq=%d): %d\n", + dd->cspec->irq, ret); } } @@ -1759,8 +1762,9 @@ static void pe_boardname(struct qib_devdata *dd) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) - qib_dev_err(dd, "Unsupported InfiniPath hardware revision " - "%u.%u!\n", dd->majrev, dd->minrev); + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", @@ -1799,10 +1803,11 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) /* * Keep chip from being accessed until we are ready. Use * writeq() directly, to allow the write even though QIB_PRESENT - * isnt' set. + * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler re-ordering around actual reset */ @@ -1833,8 +1838,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) bail: if (ret) { if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) - qib_dev_err(dd, "Reset failed to setup PCIe or " - "interrupts; continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* clear the reset error, init error/hwerror mask */ qib_6120_init_hwerrors(dd); /* for Rev2 error interrupts; nop for rev 1 */ @@ -1876,8 +1881,9 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, } pa >>= 11; if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -1941,8 +1947,9 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, } pa >>= 11; if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -2074,11 +2081,13 @@ static void qib_6120_config_ctxts(struct qib_devdata *dd) } static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd, - u32 updegr, u32 egrhd) + u32 updegr, u32 egrhd, u32 npkts) { - qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); if (updegr) qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); } static u32 qib_6120_hdrqempty(struct qib_ctxtdata *rcd) @@ -2103,7 +2112,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd) dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev, dd->rcd[0]->rcvhdrq_size, &dd->cspec->dummy_hdrq_phys, - GFP_KERNEL | __GFP_COMP); + GFP_ATOMIC | __GFP_COMP); if (!dd->cspec->dummy_hdrq) { qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n"); /* fallback to just 0'ing */ @@ -2171,7 +2180,7 @@ static void rcvctrl_6120_mod(struct qib_pportdata *ppd, unsigned int op, * Init the context registers also; if we were * disabled, tail and head should both be zero * already from the enable, but since we don't - * know, we have to do it explictly. + * know, we have to do it explicitly. */ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); @@ -2926,8 +2935,9 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what) ppd->dd->unit, ppd->port); } else if (!strncmp(what, "off", 3)) { ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -3130,6 +3140,7 @@ static void get_6120_chip_params(struct qib_devdata *dd) val = qib_read_kreg64(dd, kr_sendpiobufcnt); dd->piobcnt2k = val & ~0U; dd->piobcnt4k = val >> 32; + dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1; /* these may be adjusted in init_chip_wc_pat() */ dd->pio2kbase = (u32 __iomem *) (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase); @@ -3183,11 +3194,10 @@ static int qib_late_6120_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } return ret; @@ -3215,8 +3225,8 @@ static int init_6120_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -3255,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd) dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR; ppd->link_width_enabled = IB_WIDTH_4X; @@ -3273,6 +3285,8 @@ static int init_6120_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_6120_tidtemplate(dd); @@ -3451,6 +3465,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum) return -ENXIO; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 6120 boards never disable EEPROM Write */ static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -3526,6 +3547,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_6120_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_6120_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_6120_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. @@ -3546,8 +3570,8 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, goto bail; if (qib_pcie_params(dd, 8, NULL, NULL)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); dd->cspec->irq = pdev->irq; /* save IRQ */ /* clear diagctrl register, in case diags were running and crashed */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 6fd8d74e739..7dec89fdc12 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -39,6 +39,7 @@ #include <linux/interrupt.h> #include <linux/pci.h> #include <linux/delay.h> +#include <linux/module.h> #include <linux/io.h> #include <rdma/ib_verbs.h> @@ -1050,7 +1051,7 @@ static void reenable_7220_chase(unsigned long opaque) static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst) { u8 ibclt; - u64 tnow; + unsigned long tnow; ibclt = (u8)SYM_FIELD(ibcst, IBCStatus, LinkTrainingState); @@ -1065,9 +1066,9 @@ static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst) case IB_7220_LT_STATE_CFGWAITRMT: case IB_7220_LT_STATE_TXREVLANES: case IB_7220_LT_STATE_CFGENH: - tnow = get_jiffies_64(); + tnow = jiffies; if (ppd->cpspec->chase_end && - time_after64(tnow, ppd->cpspec->chase_end)) { + time_after(tnow, ppd->cpspec->chase_end)) { ppd->cpspec->chase_end = 0; qib_set_ib_7220_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, @@ -1110,9 +1111,9 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) sdma_7220_errors(ppd, errs); if (errs & ~IB_E_BITSEXTANT) - qib_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (errs & ~IB_E_BITSEXTANT)); + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); if (errs & E_SUM_ERRS) { qib_disarm_7220_senderrbufs(ppd); @@ -1191,8 +1192,8 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) } if (errs & ERR_MASK(ResetNegated)) { - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1304,8 +1305,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) goto bail; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); goto bail; } qib_stats.sps_hwerrs++; @@ -1328,13 +1329,14 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_inc_eeprom_err(dd, log_idx, 1); if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC | RXE_PARITY)) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); if (hwerrs & ~IB_HWE_BITSEXTANT) - qib_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~IB_HWE_BITSEXTANT)); + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT)); if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) qib_sd7220_clr_ibpar(dd); @@ -1361,8 +1363,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcat(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -1408,8 +1411,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * For /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -1692,8 +1696,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; spin_unlock_irqrestore(&ppd->lflags_lock, flags); wake_up(&ppd->cpspec->autoneg_wait); - cancel_delayed_work(&ppd->cpspec->autoneg_work); - flush_scheduled_work(); + cancel_delayed_work_sync(&ppd->cpspec->autoneg_work); shutdown_7220_relock_poll(ppd->dd); val = qib_read_kreg64(ppd->dd, kr_xgxs_cfg); @@ -1918,8 +1921,9 @@ static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat) qib_stats.sps_errints++; estat = qib_read_kreg64(dd, kr_errstatus); if (!estat) - qib_devinfo(dd->pcidev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); else handle_7220_errors(dd, estat); } @@ -1958,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; - + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) unlikely_7220_intr(dd, istat); @@ -2023,17 +2024,18 @@ bail: static void qib_setup_7220_interrupt(struct qib_devdata *dd) { if (!dd->cspec->irq) - qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); else { int ret = request_irq(dd->cspec->irq, qib_7220intr, dd->msi_lo ? 0 : IRQF_SHARED, QIB_DRV_NAME, dd); if (ret) - qib_dev_err(dd, "Couldn't setup %s interrupt " - "(irq=%d): %d\n", dd->msi_lo ? - "MSI" : "INTx", dd->cspec->irq, ret); + qib_dev_err(dd, + "Couldn't setup %s interrupt (irq=%d): %d\n", + dd->msi_lo ? "MSI" : "INTx", + dd->cspec->irq, ret); } } @@ -2072,9 +2074,9 @@ static void qib_7220_boardname(struct qib_devdata *dd) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) - qib_dev_err(dd, "Unsupported InfiniPath hardware " - "revision %u.%u!\n", - dd->majrev, dd->minrev); + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", @@ -2112,10 +2114,11 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) /* * Keep chip from being accessed until we are ready. Use * writeq() directly, to allow the write even though QIB_PRESENT - * isnt' set. + * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler reordering around actual reset */ @@ -2146,8 +2149,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) bail: if (ret) { if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) - qib_dev_err(dd, "Reset failed to setup PCIe or " - "interrupts; continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* hold IBC in reset, no sends, etc till later */ qib_write_kreg(dd, kr_control, 0ULL); @@ -2187,8 +2190,9 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, return; } if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -2297,7 +2301,7 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd) nchipctxts = qib_read_kreg32(dd, kr_portcnt); dd->cspec->numctxts = nchipctxts; if (qib_n_krcv_queues > 1) { - dd->qpn_mask = 0x3f; + dd->qpn_mask = 0x3e; dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports; if (dd->first_user_ctxt > nchipctxts) dd->first_user_ctxt = nchipctxts; @@ -2435,6 +2439,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) int lsb, ret = 0, setforce = 0; u16 lcmd, licmd; unsigned long flags; + u32 tmp = 0; switch (which) { case QIB_IB_CFG_LIDLMC: @@ -2468,9 +2473,6 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) maskr = IBA7220_IBC_WIDTH_MASK; lsb = IBA7220_IBC_WIDTH_SHIFT; setforce = 1; - spin_lock_irqsave(&ppd->lflags_lock, flags); - ppd->lflags |= QIBL_IB_FORCE_NOTIFY; - spin_unlock_irqrestore(&ppd->lflags_lock, flags); break; case QIB_IB_CFG_SPD_ENB: /* set allowed Link speeds */ @@ -2480,7 +2482,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) * we command the link down. As with width, only write the * actual register if the link is currently down, otherwise * takes effect on next link change. Since setting is being - * explictly requested (via MAD or sysfs), clear autoneg + * explicitly requested (via MAD or sysfs), clear autoneg * failure status if speed autoneg is enabled. */ ppd->link_speed_enabled = val; @@ -2644,6 +2646,28 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) goto bail; } qib_set_ib_7220_lstate(ppd, lcmd, licmd); + + maskr = IBA7220_IBC_WIDTH_MASK; + lsb = IBA7220_IBC_WIDTH_SHIFT; + tmp = (ppd->cpspec->ibcddrctrl >> lsb) & maskr; + /* If the width active on the chip does not match the + * width in the shadow register, write the new active + * width to the chip. + * We don't have to worry about speed as the speed is taken + * care of by set_7220_ibspeed_fast called by ib_updown. + */ + if (ppd->link_width_enabled-1 != tmp) { + ppd->cpspec->ibcddrctrl &= ~(maskr << lsb); + ppd->cpspec->ibcddrctrl |= + (((u64)(ppd->link_width_enabled-1) & maskr) << + lsb); + qib_write_kreg(dd, kr_ibcddrctrl, + ppd->cpspec->ibcddrctrl); + qib_write_kreg(dd, kr_scratch, 0); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } goto bail; case QIB_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */ @@ -2686,8 +2710,9 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what) ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); /* enable heart beat again */ val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -2703,11 +2728,13 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what) } static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd, - u32 updegr, u32 egrhd) + u32 updegr, u32 egrhd, u32 npkts) { - qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); if (updegr) qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); } static u32 qib_7220_hdrqempty(struct qib_ctxtdata *rcd) @@ -2779,7 +2806,7 @@ static void rcvctrl_7220_mod(struct qib_pportdata *ppd, unsigned int op, * Init the context registers also; if we were * disabled, tail and head should both be zero * already from the enable, but since we don't - * know, we have to do it explictly. + * know, we have to do it explicitly. */ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); @@ -3285,8 +3312,8 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) if (!dd->msi_lo) return 0; - qib_devinfo(dd->pcidev, "MSI interrupt not detected," - " trying INTx interrupts\n"); + qib_devinfo(dd->pcidev, + "MSI interrupt not detected, trying INTx interrupts\n"); qib_7220_free_irq(dd); qib_enable_intx(dd->pcidev); /* @@ -3515,8 +3542,8 @@ static void try_7220_autoneg(struct qib_pportdata *ppd) toggle_7220_rclkrls(ppd->dd); /* 2 msec is minimum length of a poll cycle */ - schedule_delayed_work(&ppd->cpspec->autoneg_work, - msecs_to_jiffies(2)); + queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work, + msecs_to_jiffies(2)); } /* @@ -3958,11 +3985,10 @@ static int qib_late_7220_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } qib_register_observer(dd, &sendctrl_observer); @@ -3992,8 +4018,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -4033,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) init_waitqueue_head(&cpspec->autoneg_wait); INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR; @@ -4066,6 +4094,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7220_tidtemplate(dd); @@ -4133,6 +4163,7 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->cspec->sdmabufcnt; dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->last_pio = dd->cspec->lastbuf_for_pio; dd->pbufsctxt = dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt); @@ -4482,6 +4513,13 @@ bail: return ret; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 7220 boards never disable EEPROM Write */ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -4556,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_7220_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7220_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7220_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. @@ -4588,8 +4629,8 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, break; } if (qib_pcie_params(dd, minwidth, NULL, NULL)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); /* save IRQ for possible later use */ dd->cspec->irq = pdev->irq; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 584d443b533..a7eb32517a0 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -40,14 +41,22 @@ #include <linux/delay.h> #include <linux/io.h> #include <linux/jiffies.h> +#include <linux/module.h> #include <rdma/ib_verbs.h> #include <rdma/ib_smi.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif #include "qib.h" #include "qib_7322_regs.h" #include "qib_qsfp.h" #include "qib_mad.h" +#include "qib_verbs.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME " " fmt static void qib_setup_7322_setextled(struct qib_pportdata *, u32); static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t); @@ -71,6 +80,10 @@ static void qib_7322_mini_pcs_reset(struct qib_pportdata *); static u32 ahb_mod(struct qib_devdata *, int, int, int, u32, u32); static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned); +static void serdes_7322_los_enable(struct qib_pportdata *, int); +static int serdes_7322_init_old(struct qib_pportdata *); +static int serdes_7322_init_new(struct qib_pportdata *); +static void dump_sdma_7322_state(struct qib_pportdata *); #define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) @@ -111,6 +124,25 @@ static ushort qib_singleport; module_param_named(singleport, qib_singleport, ushort, S_IRUGO); MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space"); +static ushort qib_krcvq01_no_msi; +module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO); +MODULE_PARM_DESC(krcvq01_no_msi, "No MSI for kctx < 2"); + +/* + * Receive header queue sizes + */ +static unsigned qib_rcvhdrcnt; +module_param_named(rcvhdrcnt, qib_rcvhdrcnt, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrcnt, "receive header count"); + +static unsigned qib_rcvhdrsize; +module_param_named(rcvhdrsize, qib_rcvhdrsize, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrsize, "receive header size in 32-bit words"); + +static unsigned qib_rcvhdrentsize; +module_param_named(rcvhdrentsize, qib_rcvhdrentsize, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrentsize, "receive header entry size in 32-bit words"); + #define MAX_ATTEN_LEN 64 /* plenty for any real system */ /* for read back, default index is ~5m copper cable */ static char txselect_list[MAX_ATTEN_LEN] = "10"; @@ -314,7 +346,7 @@ MODULE_PARM_DESC(txselect, \ #define krp_serdesctrl KREG_IBPORT_IDX(IBSerdesCtrl) /* - * Per-context kernel registers. Acess only with qib_read_kreg_ctxt() + * Per-context kernel registers. Access only with qib_read_kreg_ctxt() * or qib_write_kreg_ctxt() */ #define krc_rcvhdraddr KREG_IDX(RcvHdrAddr0) @@ -379,7 +411,6 @@ MODULE_PARM_DESC(txselect, \ #define crp_txdroppedpkt CREG_IDX(TxDroppedPktCnt) #define crp_txhdrerr CREG_IDX(TxHeadersErrCnt) #define crp_txlenerr CREG_IDX(TxLenErrCnt) -#define crp_txlenerr CREG_IDX(TxLenErrCnt) #define crp_txminmaxlenerr CREG_IDX(TxMaxMinLenErrCnt) #define crp_txsdmadesc CREG_IDX(TxSDmaDescCnt) #define crp_txunderrun CREG_IDX(TxUnderrunCnt) @@ -451,6 +482,8 @@ static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = { #define IB_7322_LT_STATE_RECOVERIDLE 0x0f #define IB_7322_LT_STATE_CFGENH 0x10 #define IB_7322_LT_STATE_CFGTEST 0x11 +#define IB_7322_LT_STATE_CFGWAITRMTTEST 0x12 +#define IB_7322_LT_STATE_CFGWAITENH 0x13 /* link state machine states from IBC */ #define IB_7322_L_STATE_DOWN 0x0 @@ -480,14 +513,24 @@ static const u8 qib_7322_physportstate[0x20] = { IB_PHYSPORTSTATE_LINK_ERR_RECOVER, [IB_7322_LT_STATE_CFGENH] = IB_PHYSPORTSTATE_CFG_ENH, [IB_7322_LT_STATE_CFGTEST] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x13] = IB_PHYSPORTSTATE_CFG_WAIT_ENH, + [IB_7322_LT_STATE_CFGWAITRMTTEST] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7322_LT_STATE_CFGWAITENH] = + IB_PHYSPORTSTATE_CFG_WAIT_ENH, [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN }; +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify { + int rcv; + void *arg; + struct irq_affinity_notify notify; +}; +#endif + struct qib_chip_specific { u64 __iomem *cregbase; u64 *cntrs; @@ -515,8 +558,13 @@ struct qib_chip_specific { u32 lastbuf_for_pio; u32 stay_in_freeze; u32 recovery_ports_initted; - struct msix_entry *msix_entries; - void **msix_arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + u32 dca_ctrl; + int rhdr_cpu[18]; + int sdma_cpu[2]; + u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */ +#endif + struct qib_msix_entry *msix_entries; unsigned long *sendchkenable; unsigned long *sendgrhchk; unsigned long *sendibchk; @@ -543,7 +591,8 @@ struct vendor_txdds_ent { static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); #define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ -#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */ +#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */ +#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */ #define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ #define H1_FORCE_VAL 8 @@ -588,8 +637,8 @@ struct qib_chippport_specific { u64 ibmalfsnap; u64 ibcctrl_a; /* krp_ibcctrl_a shadow */ u64 ibcctrl_b; /* krp_ibcctrl_b shadow */ - u64 qdr_dfe_time; - u64 chase_end; + unsigned long qdr_dfe_time; + unsigned long chase_end; u32 autoneg_tries; u32 recovery_init; u32 qdr_dfe_on; @@ -604,6 +653,7 @@ struct qib_chippport_specific { u8 ibmalfusesnap; struct qib_qsfp_data qsfp_data; char epmsgbuf[192]; /* for port error interrupt msg buffer */ + char sdmamsgbuf[192]; /* for per-port sdma error messages */ }; static struct { @@ -611,27 +661,75 @@ static struct { irq_handler_t handler; int lsb; int port; /* 0 if not port-specific, else port # */ + int dca; } irq_table[] = { - { QIB_DRV_NAME, qib_7322intr, -1, 0 }, - { QIB_DRV_NAME " (buf avail)", qib_7322bufavail, - SYM_LSB(IntStatus, SendBufAvail), 0 }, - { QIB_DRV_NAME " (sdma 0)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_0), 1 }, - { QIB_DRV_NAME " (sdma 1)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_1), 2 }, - { QIB_DRV_NAME " (sdmaI 0)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, - { QIB_DRV_NAME " (sdmaI 1)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, - { QIB_DRV_NAME " (sdmaP 0)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, - { QIB_DRV_NAME " (sdmaP 1)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, - { QIB_DRV_NAME " (sdmaC 0)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, - { QIB_DRV_NAME " (sdmaC 1)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, + { "", qib_7322intr, -1, 0, 0 }, + { " (buf avail)", qib_7322bufavail, + SYM_LSB(IntStatus, SendBufAvail), 0, 0}, + { " (sdma 0)", sdma_intr, + SYM_LSB(IntStatus, SDmaInt_0), 1, 1 }, + { " (sdma 1)", sdma_intr, + SYM_LSB(IntStatus, SDmaInt_1), 2, 1 }, + { " (sdmaI 0)", sdma_idle_intr, + SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1}, + { " (sdmaI 1)", sdma_idle_intr, + SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1}, + { " (sdmaP 0)", sdma_progress_intr, + SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 }, + { " (sdmaP 1)", sdma_progress_intr, + SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 }, + { " (sdmaC 0)", sdma_cleanup_intr, + SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 }, + { " (sdmaC 1)", sdma_cleanup_intr, + SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0}, +}; + +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static const struct dca_reg_map { + int shadow_inx; + int lsb; + u64 mask; + u16 regno; +} dca_rcvhdr_reg_map[] = { + { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) }, }; +#endif /* ibcctrl bits */ #define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 @@ -655,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *); static void setup_7322_link_recovery(struct qib_pportdata *, u32); static void check_7322_rxe_status(struct qib_pportdata *); static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); +#ifdef CONFIG_INFINIBAND_QIB_DCA +static void qib_setup_dca(struct qib_devdata *dd); +static void setup_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +static void reset_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +#endif /** * qib_read_ureg32 - read 32-bit virtualized per-context register @@ -1084,9 +1189,9 @@ static inline u32 read_7322_creg32_port(const struct qib_pportdata *ppd, #define AUTONEG_TRIES 3 /* sequential retries to negotiate DDR */ #define HWE_AUTO(fldname) { .mask = SYM_MASK(HwErrMask, fldname##Mask), \ - .msg = #fldname } + .msg = #fldname , .sz = sizeof(#fldname) } #define HWE_AUTO_P(fldname, port) { .mask = SYM_MASK(HwErrMask, \ - fldname##Mask##_##port), .msg = #fldname } + fldname##Mask##_##port), .msg = #fldname , .sz = sizeof(#fldname) } static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = { HWE_AUTO_P(IBSerdesPClkNotDetect, 1), HWE_AUTO_P(IBSerdesPClkNotDetect, 0), @@ -1104,14 +1209,16 @@ static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = { HWE_AUTO_P(IBCBusFromSPCParityErr, 0), HWE_AUTO(statusValidNoEop), HWE_AUTO(LATriggered), - { .mask = 0 } + { .mask = 0, .sz = 0 } }; #define E_AUTO(fldname) { .mask = SYM_MASK(ErrMask, fldname##Mask), \ - .msg = #fldname } + .msg = #fldname, .sz = sizeof(#fldname) } #define E_P_AUTO(fldname) { .mask = SYM_MASK(ErrMask_0, fldname##Mask), \ - .msg = #fldname } + .msg = #fldname, .sz = sizeof(#fldname) } static const struct qib_hwerror_msgs qib_7322error_msgs[] = { + E_AUTO(RcvEgrFullErr), + E_AUTO(RcvHdrFullErr), E_AUTO(ResetNegated), E_AUTO(HardwareErr), E_AUTO(InvalidAddrErr), @@ -1124,9 +1231,7 @@ static const struct qib_hwerror_msgs qib_7322error_msgs[] = { E_AUTO(SendSpecialTriggerErr), E_AUTO(SDmaWrongPortErr), E_AUTO(SDmaBufMaskDuplicateErr), - E_AUTO(RcvHdrFullErr), - E_AUTO(RcvEgrFullErr), - { .mask = 0 } + { .mask = 0, .sz = 0 } }; static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { @@ -1136,7 +1241,8 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { /* * SDmaHaltErr is not really an error, make it clearer; */ - {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted"}, + {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted", + .sz = 11}, E_P_AUTO(SDmaDescAddrMisalignErr), E_P_AUTO(SDmaUnexpDataErr), E_P_AUTO(SDmaMissingDwErr), @@ -1172,7 +1278,7 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { E_P_AUTO(RcvICRCErr), E_P_AUTO(RcvVCRCErr), E_P_AUTO(RcvFormatErr), - { .mask = 0 } + { .mask = 0, .sz = 0 } }; /* @@ -1180,17 +1286,17 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { * context */ #define INTR_AUTO(fldname) { .mask = SYM_MASK(IntMask, fldname##Mask), \ - .msg = #fldname } + .msg = #fldname, .sz = sizeof(#fldname) } /* Below generates "auto-message" for interrupts specific to a port */ #define INTR_AUTO_P(fldname) { .mask = MASK_ACROSS(\ SYM_LSB(IntMask, fldname##Mask##_0), \ SYM_LSB(IntMask, fldname##Mask##_1)), \ - .msg = #fldname "_P" } + .msg = #fldname "_P", .sz = sizeof(#fldname "_P") } /* For some reason, the SerDesTrimDone bits are reversed */ #define INTR_AUTO_PI(fldname) { .mask = MASK_ACROSS(\ SYM_LSB(IntMask, fldname##Mask##_1), \ SYM_LSB(IntMask, fldname##Mask##_0)), \ - .msg = #fldname "_P" } + .msg = #fldname "_P", .sz = sizeof(#fldname "_P") } /* * Below generates "auto-message" for interrupts specific to a context, * with ctxt-number appended @@ -1198,7 +1304,7 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { #define INTR_AUTO_C(fldname) { .mask = MASK_ACROSS(\ SYM_LSB(IntMask, fldname##0IntMask), \ SYM_LSB(IntMask, fldname##17IntMask)), \ - .msg = #fldname "_C"} + .msg = #fldname "_C", .sz = sizeof(#fldname "_C") } static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { INTR_AUTO_P(SDmaInt), @@ -1212,11 +1318,12 @@ static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { INTR_AUTO_P(SendDoneInt), INTR_AUTO(SendBufAvailInt), INTR_AUTO_C(RcvAvail), - { .mask = 0 } + { .mask = 0, .sz = 0 } }; #define TXSYMPTOM_AUTO_P(fldname) \ - { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), .msg = #fldname } + { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \ + .msg = #fldname, .sz = sizeof(#fldname) } static const struct qib_hwerror_msgs hdrchk_msgs[] = { TXSYMPTOM_AUTO_P(NonKeyPacket), TXSYMPTOM_AUTO_P(GRHFail), @@ -1225,7 +1332,7 @@ static const struct qib_hwerror_msgs hdrchk_msgs[] = { TXSYMPTOM_AUTO_P(SLIDFail), TXSYMPTOM_AUTO_P(RawIPV6), TXSYMPTOM_AUTO_P(PacketTooSmall), - { .mask = 0 } + { .mask = 0, .sz = 0 } }; #define IBA7322_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */ @@ -1270,7 +1377,7 @@ static void err_decode(char *msg, size_t len, u64 errs, u64 these, lmask; int took, multi, n = 0; - while (msp && msp->mask) { + while (errs && msp && msp->mask) { multi = (msp->mask & (msp->mask - 1)); while (errs & msp->mask) { these = (errs & msp->mask); @@ -1281,9 +1388,14 @@ static void err_decode(char *msg, size_t len, u64 errs, *msg++ = ','; len--; } - took = scnprintf(msg, len, "%s", msp->msg); + BUG_ON(!msp->sz); + /* msp->sz counts the nul */ + took = min_t(size_t, msp->sz - (size_t)1, len); + memcpy(msg, msp->msg, took); len -= took; msg += took; + if (len) + *msg = '\0'; } errs &= ~lmask; if (len && multi) { @@ -1484,6 +1596,8 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) struct qib_devdata *dd = ppd->dd; errs &= QIB_E_P_SDMAERRS; + err_decode(ppd->cpspec->sdmamsgbuf, sizeof(ppd->cpspec->sdmamsgbuf), + errs, qib_7322p_error_msgs); if (errs & QIB_E_P_SDMAUNEXPDATA) qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit, @@ -1491,6 +1605,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) spin_lock_irqsave(&ppd->sdma_lock, flags); + if (errs != QIB_E_P_SDMAHALT) { + /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */ + qib_dev_porterr(dd, ppd->port, + "SDMA %s 0x%016llx %s\n", + qib_sdma_state_names[ppd->sdma_state.current_state], + errs, ppd->cpspec->sdmamsgbuf); + dump_sdma_7322_state(ppd); + } + switch (ppd->sdma_state.current_state) { case qib_sdma_state_s00_hw_down: break; @@ -1542,8 +1665,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) qib_stats.sps_errints++; errs = qib_read_kreg64(dd, kr_errstatus); if (!errs) { - qib_devinfo(dd->pcidev, "device error interrupt, " - "but no error bits set!\n"); + qib_devinfo(dd->pcidev, + "device error interrupt, but no error bits set!\n"); goto done; } @@ -1589,8 +1712,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) if (errs & QIB_E_RESET) { int pidx; - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1621,6 +1744,14 @@ done: return; } +static void qib_error_tasklet(unsigned long data) +{ + struct qib_devdata *dd = (struct qib_devdata *)data; + + handle_7322_errors(dd); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + static void reenable_chase(unsigned long opaque) { struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; @@ -1630,7 +1761,8 @@ static void reenable_chase(unsigned long opaque) QLOGIC_IB_IBCC_LINKINITCMD_POLL); } -static void disable_chase(struct qib_pportdata *ppd, u64 tnow, u8 ibclt) +static void disable_chase(struct qib_pportdata *ppd, unsigned long tnow, + u8 ibclt) { ppd->cpspec->chase_end = 0; @@ -1646,7 +1778,7 @@ static void disable_chase(struct qib_pportdata *ppd, u64 tnow, u8 ibclt) static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) { u8 ibclt; - u64 tnow; + unsigned long tnow; ibclt = (u8)SYM_FIELD(ibcst, IBCStatusA_0, LinkTrainingState); @@ -1661,9 +1793,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) case IB_7322_LT_STATE_CFGWAITRMT: case IB_7322_LT_STATE_TXREVLANES: case IB_7322_LT_STATE_CFGENH: - tnow = get_jiffies_64(); + tnow = jiffies; if (ppd->cpspec->chase_end && - time_after64(tnow, ppd->cpspec->chase_end)) + time_after(tnow, ppd->cpspec->chase_end)) disable_chase(ppd, tnow, ibclt); else if (!ppd->cpspec->chase_end) ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME; @@ -1673,10 +1805,14 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) break; } - if (ibclt == IB_7322_LT_STATE_CFGTEST && + if (((ibclt >= IB_7322_LT_STATE_CFGTEST && + ibclt <= IB_7322_LT_STATE_CFGWAITENH) || + ibclt == IB_7322_LT_STATE_LINKUP) && (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR))) { force_h1(ppd); ppd->cpspec->qdr_reforce = 1; + if (!ppd->dd->cspec->r1) + serdes_7322_los_enable(ppd, 0); } else if (ppd->cpspec->qdr_reforce && (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) && (ibclt == IB_7322_LT_STATE_CFGENH || @@ -1692,18 +1828,37 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) ibclt <= IB_7322_LT_STATE_SLEEPQUIET))) adj_tx_serdes(ppd); - if (!ppd->cpspec->qdr_dfe_on && ibclt != IB_7322_LT_STATE_LINKUP && - ibclt <= IB_7322_LT_STATE_SLEEPQUIET) { - ppd->cpspec->qdr_dfe_on = 1; - ppd->cpspec->qdr_dfe_time = 0; - /* On link down, reenable QDR adaptation */ - qib_write_kreg_port(ppd, krp_static_adapt_dis(2), - ppd->dd->cspec->r1 ? - QDR_STATIC_ADAPT_DOWN_R1 : - QDR_STATIC_ADAPT_DOWN); + if (ibclt != IB_7322_LT_STATE_LINKUP) { + u8 ltstate = qib_7322_phys_portstate(ibcst); + u8 pibclt = (u8)SYM_FIELD(ppd->lastibcstat, IBCStatusA_0, + LinkTrainingState); + if (!ppd->dd->cspec->r1 && + pibclt == IB_7322_LT_STATE_LINKUP && + ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER && + ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN && + ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT && + ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE) + /* If the link went down (but no into recovery, + * turn LOS back on */ + serdes_7322_los_enable(ppd, 1); + if (!ppd->cpspec->qdr_dfe_on && + ibclt <= IB_7322_LT_STATE_SLEEPQUIET) { + ppd->cpspec->qdr_dfe_on = 1; + ppd->cpspec->qdr_dfe_time = 0; + /* On link down, reenable QDR adaptation */ + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_DOWN_R1 : + QDR_STATIC_ADAPT_DOWN); + pr_info( + "IB%u:%u re-enabled QDR adaptation ibclt %x\n", + ppd->dd->unit, ppd->port, ibclt); + } } } +static int qib_7322_set_ib_cfg(struct qib_pportdata *, int, u32); + /* * This is per-pport error handling. * will likely get it's own MSIx interrupt (one for each port, @@ -1739,9 +1894,9 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd) if (!*msg) snprintf(msg, sizeof ppd->cpspec->epmsgbuf, "no others"); - qib_dev_porterr(dd, ppd->port, "error interrupt with unknown" - " errors 0x%016Lx set (and %s)\n", - (errs & ~QIB_E_P_BITSEXTANT), msg); + qib_dev_porterr(dd, ppd->port, + "error interrupt with unknown errors 0x%016Lx set (and %s)\n", + (errs & ~QIB_E_P_BITSEXTANT), msg); *msg = '\0'; } @@ -1959,8 +2114,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) goto bail; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); goto bail; } qib_stats.sps_hwerrs++; @@ -1974,8 +2129,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, /* no EEPROM logging, yet */ if (hwerrs) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); ctrl = qib_read_kreg32(dd, kr_control); if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) { @@ -1999,8 +2155,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcpy(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); + strlcpy(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -2012,9 +2169,33 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); + if (hwerrs & + (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) | + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) { + int pidx = 0; + int err; + unsigned long flags; + struct qib_pportdata *ppd = dd->pport; + for (; pidx < dd->num_pports; ++pidx, ppd++) { + err = 0; + if (pidx == 0 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_0))) + err++; + if (pidx == 1 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) + err++; + if (err) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + dump_sdma_7322_state(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + } + if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * for /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -2214,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); qib_write_kreg(dd, kr_scratch, 0ULL); + /* ensure previous Tx parameters are not still forced */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + if (qib_compat_ddr_negotiate) { ppd->cpspec->ibdeltainprog = 1; ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, @@ -2242,16 +2428,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) SYM_LSB(IBCCtrlA_0, MaxPktLen); ppd->cpspec->ibcctrl_a = ibc; /* without linkcmd or linkinitcmd! */ - /* initially come up waiting for TS1, without sending anything. */ - val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << - QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); - /* * Reset the PCS interface to the serdes (and also ibc, which is still * in reset from above). Writes new value of ibcctrl_a as last step. */ qib_7322_mini_pcs_reset(ppd); - qib_write_kreg(dd, kr_scratch, 0ULL); if (!ppd->cpspec->ibcctrl_b) { unsigned lse = ppd->link_speed_enabled; @@ -2317,6 +2498,14 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, IBLinkEn); set_vls(ppd); + /* initially come up DISABLED, without sending anything. */ + val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << + QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + qib_write_kreg_port(ppd, krp_ibcctrl_a, val); + qib_write_kreg(dd, kr_scratch, 0ULL); + /* clear the linkinit cmds */ + ppd->cpspec->ibcctrl_a = val & ~SYM_MASK(IBCCtrlA_0, LinkInitCmd); + /* be paranoid against later code motion, etc. */ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvIBPortEnable); @@ -2348,10 +2537,9 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; spin_unlock_irqrestore(&ppd->lflags_lock, flags); wake_up(&ppd->cpspec->autoneg_wait); - cancel_delayed_work(&ppd->cpspec->autoneg_work); + cancel_delayed_work_sync(&ppd->cpspec->autoneg_work); if (ppd->dd->cspec->r1) - cancel_delayed_work(&ppd->cpspec->ipg_work); - flush_scheduled_work(); + cancel_delayed_work_sync(&ppd->cpspec->ipg_work); ppd->cpspec->chase_end = 0; if (ppd->cpspec->chase_timer.data) /* if initted */ @@ -2483,6 +2671,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on) qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + switch (event) { + case DCA_PROVIDER_ADD: + if (dd->flags & QIB_DCA_ENABLED) + break; + if (!dca_add_requester(&dd->pcidev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } + break; + case DCA_PROVIDER_REMOVE: + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), + dd->cspec->dca_ctrl); + } + break; + } + return 0; +} + +static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_chip_specific *cspec = dd->cspec; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->rhdr_cpu[rcd->ctxt] != cpu) { + const struct dca_reg_map *rmp; + + cspec->rhdr_cpu[rcd->ctxt] = cpu; + rmp = &dca_rcvhdr_reg_map[rcd->ctxt]; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb; + qib_devinfo(dd->pcidev, + "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu, + (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + qib_write_kreg(dd, rmp->regno, + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_chip_specific *cspec = dd->cspec; + unsigned pidx = ppd->port - 1; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->sdma_cpu[pidx] != cpu) { + cspec->sdma_cpu[pidx] = cpu; + cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ? + SYM_MASK(DCACtrlF, SendDma1DCAOPH) : + SYM_MASK(DCACtrlF, SendDma0DCAOPH)); + cspec->dca_rcvhdr_ctrl[4] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << + (ppd->hw_pidx ? + SYM_LSB(DCACtrlF, SendDma1DCAOPH) : + SYM_LSB(DCACtrlF, SendDma0DCAOPH)); + qib_devinfo(dd->pcidev, + "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu, + (long long) cspec->dca_rcvhdr_ctrl[4]); + qib_write_kreg(dd, KREG_IDX(DCACtrlF), + cspec->dca_rcvhdr_ctrl[4]); + cspec->dca_ctrl |= ppd->hw_pidx ? + SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) : + SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_setup_dca(struct qib_devdata *dd) +{ + struct qib_chip_specific *cspec = dd->cspec; + int i; + + for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++) + cspec->rhdr_cpu[i] = -1; + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + cspec->sdma_cpu[i] = -1; + cspec->dca_rcvhdr_ctrl[0] = + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[1] = + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[2] = + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[3] = + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[4] = + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt)); + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i, + cspec->dca_rcvhdr_ctrl[i]); + for (i = 0; i < cspec->num_msix_entries; i++) + setup_dca_notifier(dd, &cspec->msix_entries[i]); +} + +static void qib_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct qib_irq_notify *n = + container_of(notify, struct qib_irq_notify, notify); + int cpu = cpumask_first(mask); + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + qib_update_rhdrq_dca(rcd, cpu); + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + qib_update_sdma_dca(ppd, cpu); + } +} + +static void qib_irq_notifier_release(struct kref *ref) +{ + struct qib_irq_notify *n = + container_of(ref, struct qib_irq_notify, notify.kref); + struct qib_devdata *dd; + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + dd = rcd->dd; + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + dd = ppd->dd; + } + qib_devinfo(dd->pcidev, + "release on HCA notify 0x%p n 0x%p\n", ref, n); + kfree(n); +} +#endif + /* * Disable MSIx interrupt if enabled, call generic MSIx code * to cleanup, and clear pending MSIx interrupts. @@ -2499,9 +2843,16 @@ static void qib_7322_nomsix(struct qib_devdata *dd) int i; dd->cspec->num_msix_entries = 0; - for (i = 0; i < n; i++) - free_irq(dd->cspec->msix_entries[i].vector, - dd->cspec->msix_arg[i]); + for (i = 0; i < n; i++) { +#ifdef CONFIG_INFINIBAND_QIB_DCA + reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); +#endif + irq_set_affinity_hint( + dd->cspec->msix_entries[i].msix.vector, NULL); + free_cpumask_var(dd->cspec->msix_entries[i].mask); + free_irq(dd->cspec->msix_entries[i].msix.vector, + dd->cspec->msix_entries[i].arg); + } qib_nomsix(dd); } /* make sure no MSIx interrupts are left pending */ @@ -2523,13 +2874,21 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) { int i; +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl); + } +#endif + qib_7322_free_irq(dd); kfree(dd->cspec->cntrs); kfree(dd->cspec->sendchkenable); kfree(dd->cspec->sendgrhchk); kfree(dd->cspec->sendibchk); kfree(dd->cspec->msix_entries); - kfree(dd->cspec->msix_arg); for (i = 0; i < dd->num_pports; i++) { unsigned long flags; u32 mask = QSFP_GPIO_MOD_PRS_N | @@ -2597,8 +2956,9 @@ static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat) char msg[128]; kills = istat & ~QIB_I_BITSEXTANT; - qib_dev_err(dd, "Clearing reserved interrupt(s) 0x%016llx:" - " %s\n", (unsigned long long) kills, msg); + qib_dev_err(dd, + "Clearing reserved interrupt(s) 0x%016llx: %s\n", + (unsigned long long) kills, msg); qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills)); } @@ -2647,8 +3007,8 @@ static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd) pins >>= SYM_LSB(EXTStatus, GPIOIn); if (!(pins & mask)) { ++handled; - qd->t_insert = get_jiffies_64(); - schedule_work(&qd->work); + qd->t_insert = jiffies; + queue_work(ib_wq, &qd->work); } } } @@ -2675,8 +3035,10 @@ static noinline void unlikely_7322_intr(struct qib_devdata *dd, u64 istat) unknown_7322_ibits(dd, istat); if (istat & QIB_I_GPIO) unknown_7322_gpio_intr(dd); - if (istat & QIB_I_C_ERROR) - handle_7322_errors(dd); + if (istat & QIB_I_C_ERROR) { + qib_write_kreg(dd, kr_errmask, 0ULL); + tasklet_schedule(&dd->error_tasklet); + } if (istat & INT_MASK_P(Err, 0) && dd->rcd[0]) handle_7322_p_errors(dd->rcd[0]->ppd); if (istat & INT_MASK_P(Err, 1) && dd->rcd[1]) @@ -2753,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* handle "errors" of various kinds first, device ahead of port */ if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO | @@ -2783,10 +3143,8 @@ static irqreturn_t qib_7322intr(int irq, void *data) for (i = 0; i < dd->first_user_ctxt; i++) { if (ctxtrbits & rmask) { ctxtrbits &= ~rmask; - if (dd->rcd[i]) { + if (dd->rcd[i]) qib_kreceive(dd->rcd[i], NULL, &npkts); - adjust_rcv_timeout(dd->rcd[i], npkts); - } } rmask <<= 1; } @@ -2826,16 +3184,13 @@ static irqreturn_t qib_7322pintr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) | (1ULL << QIB_I_RCVURG_LSB)) << rcd->ctxt); qib_kreceive(rcd, NULL, &npkts); - adjust_rcv_timeout(rcd, npkts); return IRQ_HANDLED; } @@ -2856,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL); @@ -2889,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -2918,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -2947,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -2977,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -2990,6 +3335,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) return IRQ_HANDLED; } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + if (!m->dca) + return; + qib_devinfo(dd->pcidev, + "Disabling notifier on HCA %d irq %d\n", + dd->unit, + m->msix.vector); + irq_set_affinity_notifier( + m->msix.vector, + NULL); + m->notifier = NULL; +} + +static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + struct qib_irq_notify *n; + + if (!m->dca) + return; + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (n) { + int ret; + + m->notifier = n; + n->notify.irq = m->msix.vector; + n->notify.notify = qib_irq_notifier_notify; + n->notify.release = qib_irq_notifier_release; + n->arg = m->arg; + n->rcv = m->rcv; + qib_devinfo(dd->pcidev, + "set notifier irq %d rcv %d notify %p\n", + n->notify.irq, n->rcv, &n->notify); + ret = irq_set_affinity_notifier( + n->notify.irq, + &n->notify); + if (ret) { + m->notifier = NULL; + kfree(n); + } + } +} + +#endif + /* * Set up our chip-specific interrupt handler. * The interrupt type has already been setup, so @@ -3003,6 +3395,8 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) int ret, i, msixnum; u64 redirect[6]; u64 mask; + const struct cpumask *local_mask; + int firstcpu, secondcpu = 0, currrcvcpu = 0; if (!dd->num_pports) return; @@ -3030,16 +3424,16 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) /* Try to get INTx interrupt */ try_intx: if (!dd->pcidev->irq) { - qib_dev_err(dd, "irq is 0, BIOS error? " - "Interrupts won't work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); goto bail; } ret = request_irq(dd->pcidev->irq, qib_7322intr, IRQF_SHARED, QIB_DRV_NAME, dd); if (ret) { - qib_dev_err(dd, "Couldn't setup INTx " - "interrupt (irq=%d): %d\n", - dd->pcidev->irq, ret); + qib_dev_err(dd, + "Couldn't setup INTx interrupt (irq=%d): %d\n", + dd->pcidev->irq, ret); goto bail; } dd->cspec->irq = dd->pcidev->irq; @@ -3051,13 +3445,31 @@ try_intx: memset(redirect, 0, sizeof redirect); mask = ~0ULL; msixnum = 0; + local_mask = cpumask_of_pcibus(dd->pcidev->bus); + firstcpu = cpumask_first(local_mask); + if (firstcpu >= nr_cpu_ids || + cpumask_weight(local_mask) == num_online_cpus()) { + local_mask = topology_core_cpumask(0); + firstcpu = cpumask_first(local_mask); + } + if (firstcpu < nr_cpu_ids) { + secondcpu = cpumask_next(firstcpu, local_mask); + if (secondcpu >= nr_cpu_ids) + secondcpu = firstcpu; + currrcvcpu = secondcpu; + } for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) { irq_handler_t handler; - const char *name; void *arg; u64 val; int lsb, reg, sh; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca = 0; +#endif + dd->cspec->msix_entries[msixnum]. + name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] + = '\0'; if (i < ARRAY_SIZE(irq_table)) { if (irq_table[i].port) { /* skip if for a non-configured port */ @@ -3066,9 +3478,16 @@ try_intx: arg = dd->pport + irq_table[i].port - 1; } else arg = dd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = irq_table[i].dca; +#endif lsb = irq_table[i].lsb; handler = irq_table[i].handler; - name = irq_table[i].name; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d%s", dd->unit, + irq_table[i].name); } else { unsigned ctxt; @@ -3077,25 +3496,41 @@ try_intx: arg = dd->rcd[ctxt]; if (!arg) continue; + if (qib_krcvq01_no_msi && ctxt < 2) + continue; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = 1; +#endif lsb = QIB_I_RCVAVAIL_LSB + ctxt; handler = qib_7322pintr; - name = QIB_DRV_NAME " (kctx)"; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d (kctx)", dd->unit); } - ret = request_irq(dd->cspec->msix_entries[msixnum].vector, - handler, 0, name, arg); + ret = request_irq( + dd->cspec->msix_entries[msixnum].msix.vector, + handler, 0, dd->cspec->msix_entries[msixnum].name, + arg); if (ret) { /* * Shouldn't happen since the enable said we could * have as many as we are trying to setup here. */ - qib_dev_err(dd, "Couldn't setup MSIx " - "interrupt (vec=%d, irq=%d): %d\n", msixnum, - dd->cspec->msix_entries[msixnum].vector, - ret); + qib_dev_err(dd, + "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", + msixnum, + dd->cspec->msix_entries[msixnum].msix.vector, + ret); qib_7322_nomsix(dd); goto try_intx; } - dd->cspec->msix_arg[msixnum] = arg; + dd->cspec->msix_entries[msixnum].arg = arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->cspec->msix_entries[msixnum].dca = dca; + dd->cspec->msix_entries[msixnum].rcv = + handler == qib_7322pintr; +#endif if (lsb >= 0) { reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * @@ -3105,12 +3540,33 @@ try_intx: } val = qib_read_kreg64(dd, 2 * msixnum + 1 + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + if (firstcpu < nr_cpu_ids && + zalloc_cpumask_var( + &dd->cspec->msix_entries[msixnum].mask, + GFP_KERNEL)) { + if (handler == qib_7322pintr) { + cpumask_set_cpu(currrcvcpu, + dd->cspec->msix_entries[msixnum].mask); + currrcvcpu = cpumask_next(currrcvcpu, + local_mask); + if (currrcvcpu >= nr_cpu_ids) + currrcvcpu = secondcpu; + } else { + cpumask_set_cpu(firstcpu, + dd->cspec->msix_entries[msixnum].mask); + } + irq_set_affinity_hint( + dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].mask); + } msixnum++; } /* Initialize the vector mapping */ for (i = 0; i < ARRAY_SIZE(redirect); i++) qib_write_kreg(dd, kr_intredirect + i, redirect[i]); dd->cspec->main_int_mask = mask; + tasklet_init(&dd->error_tasklet, qib_error_tasklet, + (unsigned long)dd); bail:; } @@ -3157,6 +3613,10 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) case BOARD_QME7342: n = "InfiniPath_QME7342"; break; + case 8: + n = "InfiniPath_QME7362"; + dd->flags |= QIB_HAS_QSFP; + break; case 15: n = "InfiniPath_QLE7342_TEST"; dd->flags |= QIB_HAS_QSFP; @@ -3183,8 +3643,9 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { - qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode" - " by module parameter\n", dd->unit); + qib_devinfo(dd->pcidev, + "IB%u: Forced to single port mode by module parameter\n", + dd->unit); features &= PORT_SPD_CAP; } @@ -3248,12 +3709,13 @@ static int qib_do_7322_reset(struct qib_devdata *dd) dd->pport->cpspec->ibsymdelta = 0; dd->pport->cpspec->iblnkerrdelta = 0; dd->pport->cpspec->ibmalfdelta = 0; - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); /* * Keep chip from being accessed until we are ready. Use * writeq() directly, to allow the write even though QIB_PRESENT - * isnt' set. + * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT | QIB_BADINTR); dd->flags |= QIB_DOING_RESET; @@ -3278,8 +3740,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (val == dd->revision) break; if (i == 5) { - qib_dev_err(dd, "Failed to initialize after reset, " - "unusable\n"); + qib_dev_err(dd, + "Failed to initialize after reset, unusable\n"); ret = 0; goto bail; } @@ -3290,7 +3752,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (msix_entries) { /* restore the MSIx vector address and data if saved above */ for (i = 0; i < msix_entries; i++) { - dd->cspec->msix_entries[i].entry = i; + dd->cspec->msix_entries[i].msix.entry = i; if (!msix_vecsave || !msix_vecsave[2 * i]) continue; qib_write_kreg(dd, 2 * i + @@ -3310,8 +3772,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (qib_pcie_params(dd, dd->lbus_width, &dd->cspec->num_msix_entries, dd->cspec->msix_entries)) - qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); qib_setup_7322_interrupt(dd, 1); @@ -3352,8 +3814,9 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, return; } if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -3475,11 +3938,6 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd) nchipctxts = qib_read_kreg32(dd, kr_contextcnt); dd->cspec->numctxts = nchipctxts; if (qib_n_krcv_queues > 1 && dd->num_pports) { - /* - * Set the mask for which bits from the QPN are used - * to select a context number. - */ - dd->qpn_mask = 0x3f; dd->first_user_ctxt = NUM_IB_PORTS + (qib_n_krcv_queues - 1) * dd->num_pports; if (dd->first_user_ctxt > nchipctxts) @@ -3530,8 +3988,11 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd) /* kr_rcvegrcnt changes based on the number of contexts enabled */ dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt); - dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, - dd->num_pports > 1 ? 1024U : 2048U); + if (qib_rcvhdrcnt) + dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt); + else + dd->rcvhdrcnt = 2 * max(dd->cspec->rcvegrcnt, + dd->num_pports > 1 ? 1024U : 2048U); } static int qib_7322_get_ib_cfg(struct qib_pportdata *ppd, int which) @@ -3683,7 +4144,7 @@ static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) /* * As with width, only write the actual register if the * link is currently down, otherwise takes effect on next - * link change. Since setting is being explictly requested + * link change. Since setting is being explicitly requested * (via MAD or sysfs), clear autoneg failure status if speed * autoneg is enabled. */ @@ -3909,8 +4370,9 @@ static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what) Loopback); /* enable heart beat again */ val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB; - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -4002,12 +4464,20 @@ static int qib_7322_set_ib_table(struct qib_pportdata *ppd, int which, void *t) } static void qib_update_7322_usrhead(struct qib_ctxtdata *rcd, u64 hd, - u32 updegr, u32 egrhd) + u32 updegr, u32 egrhd, u32 npkts) { - qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); - qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + /* + * Need to write timeout register before updating rcvhdrhead to ensure + * that the timer is enabled on reception of a packet. + */ + if (hd >> IBA7322_HDRHEAD_PKTINT_SHIFT) + adjust_rcv_timeout(rcd, npkts); if (updegr) qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); } static u32 qib_7322_hdrqempty(struct qib_ctxtdata *rcd) @@ -4113,7 +4583,7 @@ static void rcvctrl_7322_mod(struct qib_pportdata *ppd, unsigned int op, * Init the context registers also; if we were * disabled, tail and head should both be zero * already from the enable, but since we don't - * know, we have to do it explictly. + * know, we have to do it explicitly. */ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); @@ -4586,8 +5056,8 @@ static void init_7322_cntrnames(struct qib_devdata *dd) dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); if (!dd->pport[i].cpspec->portcntrs) - qib_dev_err(dd, "Failed allocation for" - " portcounters\n"); + qib_dev_err(dd, + "Failed allocation for portcounters\n"); } } @@ -4716,7 +5186,7 @@ static void qib_get_7322_faststats(unsigned long opaque) (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | QIBL_LINKACTIVE)) && ppd->cpspec->qdr_dfe_time && - time_after64(get_jiffies_64(), ppd->cpspec->qdr_dfe_time)) { + time_is_before_jiffies(ppd->cpspec->qdr_dfe_time)) { ppd->cpspec->qdr_dfe_on = 0; qib_write_kreg_port(ppd, krp_static_adapt_dis(2), @@ -4737,8 +5207,8 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) if (!dd->cspec->num_msix_entries) return 0; /* already using INTx */ - qib_devinfo(dd->pcidev, "MSIx interrupt not detected," - " trying INTx interrupts\n"); + qib_devinfo(dd->pcidev, + "MSIx interrupt not detected, trying INTx interrupts\n"); qib_7322_nomsix(dd); qib_enable_intx(dd->pcidev); qib_setup_7322_interrupt(dd, 0); @@ -4926,8 +5396,8 @@ static void try_7322_autoneg(struct qib_pportdata *ppd) set_7322_ibspeed_fast(ppd, QIB_IB_DDR); qib_7322_mini_pcs_reset(ppd); /* 2 msec is minimum length of a poll cycle */ - schedule_delayed_work(&ppd->cpspec->autoneg_work, - msecs_to_jiffies(2)); + queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work, + msecs_to_jiffies(2)); } /* @@ -5023,15 +5493,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd) goto retry; if (!ibp->smi_ah) { - struct ib_ah_attr attr; struct ib_ah *ah; - memset(&attr, 0, sizeof attr); - attr.dlid = be16_to_cpu(IB_LID_PERMISSIVE); - attr.port_num = ppd->port; - ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); if (IS_ERR(ah)) - ret = -EINVAL; + ret = PTR_ERR(ah); else { send_buf->ah = ah; ibp->smi_ah = to_iah(ah); @@ -5057,7 +5523,8 @@ static void try_7322_ipg(struct qib_pportdata *ppd) ib_free_send_mad(send_buf); retry: delay = 2 << ppd->cpspec->ipg_tries; - schedule_delayed_work(&ppd->cpspec->ipg_work, msecs_to_jiffies(delay)); + queue_delayed_work(ib_wq, &ppd->cpspec->ipg_work, + msecs_to_jiffies(delay)); } /* @@ -5149,6 +5616,8 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) QIBL_IB_AUTONEG_INPROG))) set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + struct qib_qsfp_data *qd = + &ppd->cpspec->qsfp_data; /* unlock the Tx settings, speed may change */ qib_write_kreg_port(ppd, krp_tx_deemph_override, SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, @@ -5156,6 +5625,12 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) qib_cancel_sends(ppd); /* on link down, ensure sane pcs state */ qib_7322_mini_pcs_reset(ppd); + /* schedule the qsfp refresh which should turn the link + off */ + if (ppd->dd->flags & QIB_HAS_QSFP) { + qd->t_insert = jiffies; + queue_work(ib_wq, &qd->work); + } spin_lock_irqsave(&ppd->sdma_lock, flags); if (__qib_sdma_running(ppd)) __qib_sdma_process_event(ppd, @@ -5505,37 +5980,79 @@ static void qsfp_7322_event(struct work_struct *work) { struct qib_qsfp_data *qd; struct qib_pportdata *ppd; - u64 pwrup; + unsigned long pwrup; + unsigned long flags; int ret; u32 le2; qd = container_of(work, struct qib_qsfp_data, work); ppd = qd->ppd; - pwrup = qd->t_insert + msecs_to_jiffies(QSFP_PWR_LAG_MSEC); + pwrup = qd->t_insert + + msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC); - /* - * Some QSFP's not only do not respond until the full power-up - * time, but may behave badly if we try. So hold off responding - * to insertion. - */ - while (1) { - u64 now = get_jiffies_64(); - if (time_after64(now, pwrup)) - break; - msleep(1); + /* Delay for 20 msecs to allow ModPrs resistor to setup */ + mdelay(QSFP_MODPRS_LAG_MSEC); + + if (!qib_qsfp_mod_present(ppd)) { + ppd->cpspec->qsfp_data.modpresent = 0; + /* Set the physical link to disabled */ + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else { + /* + * Some QSFP's not only do not respond until the full power-up + * time, but may behave badly if we try. So hold off responding + * to insertion. + */ + while (1) { + if (time_is_before_jiffies(pwrup)) + break; + msleep(20); + } + + ret = qib_refresh_qsfp_cache(ppd, &qd->cache); + + /* + * Need to change LE2 back to defaults if we couldn't + * read the cable type (to handle cable swaps), so do this + * even on failure to read cable information. We don't + * get here for QME, so IS_QME check not needed here. + */ + if (!ret && !ppd->dd->cspec->r1) { + if (QSFP_IS_ACTIVE_FAR(qd->cache.tech)) + le2 = LE2_QME; + else if (qd->cache.atten[1] >= qib_long_atten && + QSFP_IS_CU(qd->cache.tech)) + le2 = LE2_5m; + else + le2 = LE2_DEFAULT; + } else + le2 = LE2_DEFAULT; + ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); + /* + * We always change parameteters, since we can choose + * values for cables without eeproms, and the cable may have + * changed from a cable with full or partial eeprom content + * to one with partial or no content. + */ + init_txdds_table(ppd, 0); + /* The physical link is being re-enabled only when the + * previous state was DISABLED and the VALID bit is not + * set. This should only happen when the cable has been + * physically pulled. */ + if (!ppd->cpspec->qsfp_data.modpresent && + (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) { + ppd->cpspec->qsfp_data.modpresent = 1; + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } } - ret = qib_refresh_qsfp_cache(ppd, &qd->cache); - /* - * Need to change LE2 back to defaults if we couldn't - * read the cable type (to handle cable swaps), so do this - * even on failure to read cable information. We don't - * get here for QME, so IS_QME check not needed here. - */ - le2 = (!ret && qd->cache.atten[1] >= qib_long_atten && - !ppd->dd->cspec->r1 && QSFP_IS_CU(qd->cache.tech)) ? - LE2_5m : LE2_DEFAULT; - ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); - init_txdds_table(ppd, 0); } /* @@ -5579,6 +6096,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) u32 pidx, unit, port, deflt, h1; unsigned long val; int any = 0, seth1; + int txdds_size; str = txselect_list; @@ -5587,6 +6105,10 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) for (pidx = 0; pidx < dd->num_pports; ++pidx) dd->pport[pidx].cpspec->no_eep = deflt; + txdds_size = TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ; + if (IS_QME(dd) || IS_QMH(dd)) + txdds_size += TXDDS_MFG_SZ; + while (*nxt && nxt[1]) { str = ++nxt; unit = simple_strtoul(str, &nxt, 0); @@ -5609,7 +6131,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) ; continue; } - if (val >= TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ) + if (val >= txdds_size) continue; seth1 = 0; h1 = 0; /* gcc thinks it might be used uninitted */ @@ -5633,6 +6155,12 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) ppd->cpspec->h1_val = h1; /* now change the IBC and serdes, overriding generic */ init_txdds_table(ppd, 1); + /* Re-enable the physical state machine on mezz boards + * now that the correct settings have been set. + * QSFP boards are handles by the QSFP event handler */ + if (IS_QMH(dd) || IS_QME(dd)) + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); any++; } if (*nxt == '\n') @@ -5656,15 +6184,14 @@ static int setup_txselect(const char *str, struct kernel_param *kp) unsigned long val; char *n; if (strlen(str) >= MAX_ATTEN_LEN) { - printk(KERN_INFO QIB_DRV_NAME " txselect_values string " - "too long\n"); + pr_info("txselect_values string too long\n"); return -ENOSPC; } val = simple_strtoul(str, &n, 0); - if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ)) { - printk(KERN_INFO QIB_DRV_NAME - "txselect_values must start with a number < %d\n", - TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ); + if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + TXDDS_MFG_SZ)) { + pr_info("txselect_values must start with a number < %d\n", + TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); return -EINVAL; } strcpy(txselect_list, str); @@ -5691,11 +6218,10 @@ static int qib_late_7322_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } @@ -5810,7 +6336,8 @@ static void write_7322_initregs(struct qib_devdata *dd) unsigned n, regno; unsigned long flags; - if (!dd->qpn_mask || !dd->pport[pidx].link_speed_supported) + if (dd->n_krcv_queues < 2 || + !dd->pport[pidx].link_speed_supported) continue; ppd = &dd->pport[pidx]; @@ -5906,8 +6433,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -6017,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd) } dd->num_pports++; - qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + if (ret) { + dd->num_pports--; + goto bail; + } ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_width_enabled = IB_WIDTH_4X; @@ -6073,9 +6604,9 @@ static int qib_init_7322_variables(struct qib_devdata *dd) */ if (!(dd->flags & QIB_HAS_QSFP)) { if (!IS_QMH(dd) && !IS_QME(dd)) - qib_devinfo(dd->pcidev, "IB%u:%u: " - "Unknown mezzanine card type\n", - dd->unit, ppd->port); + qib_devinfo(dd->pcidev, + "IB%u:%u: Unknown mezzanine card type\n", + dd->unit, ppd->port); cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME; /* * Choose center value as default tx serdes setting @@ -6097,12 +6628,16 @@ static int qib_init_7322_variables(struct qib_devdata *dd) ppd++; } - dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE; - dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE; + dd->rcvhdrentsize = qib_rcvhdrentsize ? + qib_rcvhdrentsize : QIB_RCVHDR_ENTSIZE; + dd->rcvhdrsize = qib_rcvhdrsize ? + qib_rcvhdrsize : QIB_DFLT_RCVHDRSIZE; dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32); /* we always allocate at least 2048 bytes for eager buffers */ dd->rcvegrbufsize = max(mtu, 2048); + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7322_tidtemplate(dd); @@ -6143,8 +6678,10 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->piobcnt4k * dd->align4k; dd->piovl15base = ioremap_nocache(vl15off, NUM_VL15_BUFS * dd->align4k); - if (!dd->piovl15base) + if (!dd->piovl15base) { + ret = -ENOMEM; goto bail; + } } qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ @@ -6183,6 +6720,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->cspec->sdmabufcnt; dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->last_pio = dd->cspec->lastbuf_for_pio; dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ? dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0; @@ -6246,6 +6784,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); } +/* + * sdma_lock should be acquired before calling this routine + */ +static void dump_sdma_7322_state(struct qib_pportdata *ppd) +{ + u64 reg, reg1, reg2; + + reg = qib_read_kreg_port(ppd, krp_senddmastatus); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmastatus: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_sendctrl); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sendctrl: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabase); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabase: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabufmask0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + /* get bufuse bits, clear them, and print them again if non-zero */ + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + reg = qib_read_kreg_port(ppd, krp_senddmatail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmatail: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmahead); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmahead: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaheadaddr: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmalengen); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmalengen: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmadesccnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmadesccnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaidlecnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmapriorityhld: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmareloadcnt: 0x%016llx\n", reg); + + dump_sdma_state(ppd); +} + static struct sdma_set_state_action sdma_7322_action_table[] = { [qib_sdma_state_s00_hw_down] = { .go_s99_running_tofalse = 1, @@ -6495,7 +7113,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start, /* make sure we see an updated copy next time around */ sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); sleeps++; - msleep(1); + msleep(20); } switch (which) { @@ -6679,6 +7297,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, dd->f_sdma_init_early = qib_7322_sdma_init_early; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7322_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7322_notify_dca; +#endif /* * Do remaining PCIe setup and save PCIe values in dd. * Any error printing is already done by the init code. @@ -6710,21 +7331,23 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, (i >= ARRAY_SIZE(irq_table) && dd->rcd[i - ARRAY_SIZE(irq_table)])) actual_cnt++; + /* reduce by ctxt's < 2 */ + if (qib_krcvq01_no_msi) + actual_cnt -= dd->num_pports; + tabsize = actual_cnt; - dd->cspec->msix_entries = kmalloc(tabsize * - sizeof(struct msix_entry), GFP_KERNEL); - dd->cspec->msix_arg = kmalloc(tabsize * - sizeof(void *), GFP_KERNEL); - if (!dd->cspec->msix_entries || !dd->cspec->msix_arg) { + dd->cspec->msix_entries = kzalloc(tabsize * + sizeof(struct qib_msix_entry), GFP_KERNEL); + if (!dd->cspec->msix_entries) { qib_dev_err(dd, "No memory for MSIx table\n"); tabsize = 0; } for (i = 0; i < tabsize; i++) - dd->cspec->msix_entries[i].entry = i; + dd->cspec->msix_entries[i].msix.entry = i; if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); /* may be less than we wanted, if not enough available */ dd->cspec->num_msix_entries = tabsize; @@ -6733,7 +7356,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, /* clear diagctrl register, in case diags were running and crashed */ qib_write_kreg(dd, kr_hwdiagctrl, 0); - +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (!dca_add_requester(&pdev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } +#endif goto bail; bail_cleanup: @@ -6948,15 +7577,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { @@ -6965,15 +7599,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { @@ -6982,15 +7621,26 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 1, 12, 10 }, /* QME7342 backplane setting */ - { 0, 1, 12, 11 }, /* QME7342 backplane setting */ - { 0, 1, 12, 12 }, /* QME7342 backplane setting */ - { 0, 1, 12, 14 }, /* QME7342 backplane setting */ - { 0, 1, 12, 6 }, /* QME7342 backplane setting */ - { 0, 1, 12, 7 }, /* QME7342 backplane setting */ - { 0, 1, 12, 8 }, /* QME7342 backplane setting */ { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ +}; + +static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { + /* amp, pre, main, post */ + { 0, 0, 0, 0 }, /* QME7342 mfg settings */ + { 0, 0, 0, 6 }, /* QME7342 P2 mfg settings */ }; static const struct txdds_ent *get_atten_table(const struct txdds_ent *txdds, @@ -7035,7 +7685,8 @@ static void find_best_ent(struct qib_pportdata *ppd, } } - /* Lookup serdes setting by cable type and attenuation */ + /* Active cables don't have attenuation so we only set SERDES + * settings to account for the attenuation of the board traces. */ if (!override && QSFP_IS_ACTIVE(qd->tech)) { *sdr_dds = txdds_sdr + ppd->dd->board_atten; *ddr_dds = txdds_ddr + ppd->dd->board_atten; @@ -7066,6 +7717,15 @@ static void find_best_ent(struct qib_pportdata *ppd, *sdr_dds = &txdds_extra_sdr[idx]; *ddr_dds = &txdds_extra_ddr[idx]; *qdr_dds = &txdds_extra_qdr[idx]; + } else if ((IS_QME(ppd->dd) || IS_QMH(ppd->dd)) && + ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + TXDDS_MFG_SZ)) { + idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ); + pr_info("IB%u:%u use idx %u into txdds_mfg\n", + ppd->dd->unit, ppd->port, idx); + *sdr_dds = &txdds_extra_mfg[idx]; + *ddr_dds = &txdds_extra_mfg[idx]; + *qdr_dds = &txdds_extra_mfg[idx]; } else { /* this shouldn't happen, it's range checked */ *sdr_dds = txdds_sdr + qib_long_atten; @@ -7210,9 +7870,35 @@ static void ibsd_wr_allchans(struct qib_pportdata *ppd, int addr, unsigned data, } } +static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable) +{ + u64 data = qib_read_kreg_port(ppd, krp_serdesctrl); + u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN); + + if (enable && !state) { + pr_info("IB%u:%u Turning LOS on\n", + ppd->dd->unit, ppd->port); + data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN); + } else if (!enable && state) { + pr_info("IB%u:%u Turning LOS off\n", + ppd->dd->unit, ppd->port); + data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN); + } + qib_write_kreg_port(ppd, krp_serdesctrl, data); +} + static int serdes_7322_init(struct qib_pportdata *ppd) { - u64 data; + int ret = 0; + if (ppd->dd->cspec->r1) + ret = serdes_7322_init_old(ppd); + else + ret = serdes_7322_init_new(ppd); + return ret; +} + +static int serdes_7322_init_old(struct qib_pportdata *ppd) +{ u32 le_val; /* @@ -7270,11 +7956,7 @@ static int serdes_7322_init(struct qib_pportdata *ppd) ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */ ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */ - data = qib_read_kreg_port(ppd, krp_serdesctrl); - /* Turn off IB latency mode */ - data &= ~SYM_MASK(IBSerdesCtrl_0, IB_LAT_MODE); - qib_write_kreg_port(ppd, krp_serdesctrl, data | - SYM_MASK(IBSerdesCtrl_0, RXLOSEN)); + serdes_7322_los_enable(ppd, 1); /* rxbistena; set 0 to avoid effects of it switch later */ ibsd_wr_allchans(ppd, 9, 0 << 15, 1 << 15); @@ -7314,6 +7996,202 @@ static int serdes_7322_init(struct qib_pportdata *ppd) return 0; } +static int serdes_7322_init_new(struct qib_pportdata *ppd) +{ + unsigned long tend; + u32 le_val, rxcaldone; + int chan, chan_done = (1 << SERDES_CHANS) - 1; + + /* Clear cmode-override, may be set from older driver */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14); + + /* ensure no tx overrides from earlier driver loads */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + + /* START OF LSI SUGGESTED SERDES BRINGUP */ + /* Reset - Calibration Setup */ + /* Stop DFE adaptaion */ + ibsd_wr_allchans(ppd, 1, 0, BMASK(9, 1)); + /* Disable LE1 */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(5, 5)); + /* Disable autoadapt for LE1 */ + ibsd_wr_allchans(ppd, 1, 0, BMASK(15, 15)); + /* Disable LE2 */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(6, 6)); + /* Disable VGA */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); + /* Disable AFE Offset Cancel */ + ibsd_wr_allchans(ppd, 12, 0, BMASK(12, 12)); + /* Disable Timing Loop */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(3, 3)); + /* Disable Frequency Loop */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(4, 4)); + /* Disable Baseline Wander Correction */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(13, 13)); + /* Disable RX Calibration */ + ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10)); + /* Disable RX Offset Calibration */ + ibsd_wr_allchans(ppd, 12, 0, BMASK(4, 4)); + /* Select BB CDR */ + ibsd_wr_allchans(ppd, 2, (1 << 15), BMASK(15, 15)); + /* CDR Step Size */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(9, 8)); + /* Enable phase Calibration */ + ibsd_wr_allchans(ppd, 12, (1 << 5), BMASK(5, 5)); + /* DFE Bandwidth [2:14-12] */ + ibsd_wr_allchans(ppd, 2, (4 << 12), BMASK(14, 12)); + /* DFE Config (4 taps only) */ + ibsd_wr_allchans(ppd, 16, 0, BMASK(1, 0)); + /* Gain Loop Bandwidth */ + if (!ppd->dd->cspec->r1) { + ibsd_wr_allchans(ppd, 12, 1 << 12, BMASK(12, 12)); + ibsd_wr_allchans(ppd, 12, 2 << 8, BMASK(11, 8)); + } else { + ibsd_wr_allchans(ppd, 19, (3 << 11), BMASK(13, 11)); + } + /* Baseline Wander Correction Gain [13:4-0] (leave as default) */ + /* Baseline Wander Correction Gain [3:7-5] (leave as default) */ + /* Data Rate Select [5:7-6] (leave as default) */ + /* RX Parallel Word Width [3:10-8] (leave as default) */ + + /* RX REST */ + /* Single- or Multi-channel reset */ + /* RX Analog reset */ + /* RX Digital reset */ + ibsd_wr_allchans(ppd, 0, 0, BMASK(15, 13)); + msleep(20); + /* RX Analog reset */ + ibsd_wr_allchans(ppd, 0, (1 << 14), BMASK(14, 14)); + msleep(20); + /* RX Digital reset */ + ibsd_wr_allchans(ppd, 0, (1 << 13), BMASK(13, 13)); + msleep(20); + + /* setup LoS params; these are subsystem, so chan == 5 */ + /* LoS filter threshold_count on, ch 0-3, set to 8 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4)); + + /* LoS filter threshold_count off, ch 0-3, set to 4 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8)); + + /* LoS filter select enabled */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15); + + /* LoS target data: SDR=4, DDR=2, QDR=1 */ + ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */ + ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */ + ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */ + + /* Turn on LOS on initial SERDES init */ + serdes_7322_los_enable(ppd, 1); + /* FLoop LOS gate: PPM filter enabled */ + ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10); + + /* RX LATCH CALIBRATION */ + /* Enable Eyefinder Phase Calibration latch */ + ibsd_wr_allchans(ppd, 15, 1, BMASK(0, 0)); + /* Enable RX Offset Calibration latch */ + ibsd_wr_allchans(ppd, 12, (1 << 4), BMASK(4, 4)); + msleep(20); + /* Start Calibration */ + ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10)); + tend = jiffies + msecs_to_jiffies(500); + while (chan_done && !time_is_before_jiffies(tend)) { + msleep(20); + for (chan = 0; chan < SERDES_CHANS; ++chan) { + rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), + (chan + (chan >> 1)), + 25, 0, 0); + if ((~rxcaldone & (u32)BMASK(9, 9)) == 0 && + (~chan_done & (1 << chan)) == 0) + chan_done &= ~(1 << chan); + } + } + if (chan_done) { + pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n", + IBSD(ppd->hw_pidx), chan_done); + } else { + for (chan = 0; chan < SERDES_CHANS; ++chan) { + rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), + (chan + (chan >> 1)), + 25, 0, 0); + if ((~rxcaldone & (u32)BMASK(10, 10)) == 0) + pr_info("Serdes %d chan %d calibration failed\n", + IBSD(ppd->hw_pidx), chan); + } + } + + /* Turn off Calibration */ + ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10)); + msleep(20); + + /* BRING RX UP */ + /* Set LE2 value (May be overridden in qsfp_7322_event) */ + le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT; + ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7)); + /* Set LE2 Loop bandwidth */ + ibsd_wr_allchans(ppd, 3, (7 << 5), BMASK(7, 5)); + /* Enable LE2 */ + ibsd_wr_allchans(ppd, 13, (1 << 6), BMASK(6, 6)); + msleep(20); + /* Enable H0 only */ + ibsd_wr_allchans(ppd, 1, 1, BMASK(9, 1)); + /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */ + le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac; + ibsd_wr_allchans(ppd, 21, le_val, 0xfffe); + /* Enable VGA */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); + msleep(20); + /* Set Frequency Loop Bandwidth */ + ibsd_wr_allchans(ppd, 2, (15 << 5), BMASK(8, 5)); + /* Enable Frequency Loop */ + ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4)); + /* Set Timing Loop Bandwidth */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9)); + /* Enable Timing Loop */ + ibsd_wr_allchans(ppd, 2, (1 << 3), BMASK(3, 3)); + msleep(50); + /* Enable DFE + * Set receive adaptation mode. SDR and DDR adaptation are + * always on, and QDR is initially enabled; later disabled. + */ + qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN); + ppd->cpspec->qdr_dfe_on = 1; + /* Disable LE1 */ + ibsd_wr_allchans(ppd, 13, (0 << 5), (1 << 5)); + /* Disable auto adapt for LE1 */ + ibsd_wr_allchans(ppd, 1, (0 << 15), BMASK(15, 15)); + msleep(20); + /* Enable AFE Offset Cancel */ + ibsd_wr_allchans(ppd, 12, (1 << 12), BMASK(12, 12)); + /* Enable Baseline Wander Correction */ + ibsd_wr_allchans(ppd, 12, (1 << 13), BMASK(13, 13)); + /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */ + ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11)); + /* VGA output common mode */ + ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2)); + + /* + * Initialize the Tx DDS tables. Also done every QSFP event, + * for adapters with QSFP + */ + init_txdds_table(ppd, 0); + + return 0; +} + /* start adjust QMH serdes parameters */ static void set_man_code(struct qib_pportdata *ppd, int chan, int code) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index f3b50393604..8d3c78ddc90 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -37,9 +37,22 @@ #include <linux/vmalloc.h> #include <linux/delay.h> #include <linux/idr.h> +#include <linux/module.h> +#include <linux/printk.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif #include "qib.h" #include "qib_common.h" +#include "qib_mad.h" +#ifdef CONFIG_DEBUG_FS +#include "qib_debugfs.h" +#include "qib_verbs.h" +#endif + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt /* * min buffers we want to have per context, after driver @@ -58,6 +71,11 @@ ushort qib_cfgctxts; module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); +unsigned qib_numa_aware; +module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); +MODULE_PARM_DESC(numa_aware, + "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); + /* * If set, do not write to any regs if avoidable, hack to allow * check for deranged default register values. @@ -70,6 +88,9 @@ unsigned qib_n_krcv_queues; module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); +unsigned qib_cc_table_size; +module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); +MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); /* * qib_wc_pat parameter: * 0 is WC via MTRR @@ -80,9 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); -struct workqueue_struct *qib_wq; -struct workqueue_struct *qib_cq_wq; - static void verify_interrupt(unsigned long); static struct idr qib_unit_table; @@ -92,14 +110,18 @@ unsigned long *qib_cpulist; /* set number of contexts we'll actually use */ void qib_set_ctxtcnt(struct qib_devdata *dd) { - if (!qib_cfgctxts) + if (!qib_cfgctxts) { dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); - else if (qib_cfgctxts < dd->num_pports) + if (dd->cfgctxts > dd->ctxtcnt) + dd->cfgctxts = dd->ctxtcnt; + } else if (qib_cfgctxts < dd->num_pports) dd->cfgctxts = dd->ctxtcnt; else if (qib_cfgctxts <= dd->ctxtcnt) dd->cfgctxts = qib_cfgctxts; else dd->cfgctxts = dd->ctxtcnt; + dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 : + dd->cfgctxts - dd->first_user_ctxt; } /* @@ -108,7 +130,11 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) int qib_create_ctxts(struct qib_devdata *dd) { unsigned i; - int ret; + int local_node_id = pcibus_to_node(dd->pcidev->bus); + + if (local_node_id < 0) + local_node_id = numa_node_id(); + dd->assigned_node_id = local_node_id; /* * Allocate full ctxtcnt array, rather than just cfgctxts, because @@ -116,10 +142,9 @@ int qib_create_ctxts(struct qib_devdata *dd) */ dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); if (!dd->rcd) { - qib_dev_err(dd, "Unable to allocate ctxtdata array, " - "failing\n"); - ret = -ENOMEM; - goto done; + qib_dev_err(dd, + "Unable to allocate ctxtdata array, failing\n"); + return -ENOMEM; } /* create (one or more) kctxt */ @@ -131,38 +156,51 @@ int qib_create_ctxts(struct qib_devdata *dd) continue; ppd = dd->pport + (i % dd->num_pports); - rcd = qib_create_ctxtdata(ppd, i); + + rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); if (!rcd) { - qib_dev_err(dd, "Unable to allocate ctxtdata" - " for Kernel ctxt, failing\n"); - ret = -ENOMEM; - goto done; + qib_dev_err(dd, + "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); + kfree(dd->rcd); + dd->rcd = NULL; + return -ENOMEM; } rcd->pkeys[0] = QIB_DEFAULT_P_KEY; rcd->seq_cnt = 1; } - ret = 0; -done: - return ret; + return 0; } /* * Common code for user and kernel context setup. */ -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, + int node_id) { struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; - rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id); if (rcd) { INIT_LIST_HEAD(&rcd->qp_wait_list); + rcd->node_id = node_id; rcd->ppd = ppd; rcd->dd = dd; rcd->cnt = 1; rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; - +#ifdef CONFIG_DEBUG_FS + if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), + GFP_KERNEL, node_id); + if (!rcd->opstats) { + kfree(rcd); + qib_dev_err(dd, + "Unable to allocate per ctxt stats buffer\n"); + return NULL; + } + } +#endif dd->f_init_ctxt(rcd); /* @@ -182,6 +220,9 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + rcd->rcvegrbufs_perchunk - 1) / rcd->rcvegrbufs_perchunk; + BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); + rcd->rcvegrbufs_perchunk_shift = + ilog2(rcd->rcvegrbufs_perchunk); } return rcd; } @@ -189,20 +230,101 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) /* * Common code for initializing the physical port structure. */ -void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, +int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, u8 hw_pidx, u8 port) { + int size; ppd->dd = dd; ppd->hw_pidx = hw_pidx; ppd->port = port; /* IB port number, not index */ spin_lock_init(&ppd->sdma_lock); spin_lock_init(&ppd->lflags_lock); + spin_lock_init(&ppd->cc_shadow_lock); init_waitqueue_head(&ppd->state_wait); init_timer(&ppd->symerr_clear_timer); ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; ppd->symerr_clear_timer.data = (unsigned long)ppd; + + ppd->qib_wq = NULL; + ppd->ibport_data.pmastats = + alloc_percpu(struct qib_pma_counters); + if (!ppd->ibport_data.pmastats) + return -ENOMEM; + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) + goto bail; + + ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size, + IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT); + + ppd->cc_max_table_entries = + ppd->cc_supported_table_entries/IB_CCT_ENTRIES; + + size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) + * IB_CCT_ENTRIES; + ppd->ccti_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries) { + qib_dev_err(dd, + "failed to allocate congestion control table for port %d!\n", + port); + goto bail; + } + + size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); + ppd->congestion_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries) { + qib_dev_err(dd, + "failed to allocate congestion setting list for port %d!\n", + port); + goto bail_1; + } + + size = sizeof(struct cc_table_shadow); + ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow ccti list for port %d!\n", + port); + goto bail_2; + } + + size = sizeof(struct ib_cc_congestion_setting_attr); + ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow congestion setting list for port %d!\n", + port); + goto bail_3; + } + + return 0; + +bail_3: + kfree(ppd->ccti_entries_shadow); + ppd->ccti_entries_shadow = NULL; +bail_2: + kfree(ppd->congestion_entries); + ppd->congestion_entries = NULL; +bail_1: + kfree(ppd->ccti_entries); + ppd->ccti_entries = NULL; +bail: + /* User is intentionally disabling the congestion control agent */ + if (!qib_cc_table_size) + return 0; + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { + qib_cc_table_size = 0; + qib_dev_err(dd, + "Congestion Control table size %d less than minimum %d for port %d\n", + qib_cc_table_size, IB_CCT_MIN_ENTRIES, port); + } + + qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", + port); + return 0; } static int init_pioavailregs(struct qib_devdata *dd) @@ -214,8 +336,8 @@ static int init_pioavailregs(struct qib_devdata *dd) &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, GFP_KERNEL); if (!dd->pioavailregs_dma) { - qib_dev_err(dd, "failed to allocate PIOavail reg area " - "in memory\n"); + qib_dev_err(dd, + "failed to allocate PIOavail reg area in memory\n"); ret = -ENOMEM; goto done; } @@ -268,23 +390,20 @@ static void init_shadow_tids(struct qib_devdata *dd) struct page **pages; dma_addr_t *addrs; - pages = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); + pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); if (!pages) { - qib_dev_err(dd, "failed to allocate shadow page * " - "array, no expected sends!\n"); + qib_dev_err(dd, + "failed to allocate shadow page * array, no expected sends!\n"); goto bail; } - addrs = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); + addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); if (!addrs) { - qib_dev_err(dd, "failed to allocate shadow dma handle " - "array, no expected sends!\n"); + qib_dev_err(dd, + "failed to allocate shadow dma handle array, no expected sends!\n"); goto bail_free; } - memset(pages, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); - memset(addrs, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); - dd->pageshadow = pages; dd->physshadow = addrs; return; @@ -305,13 +424,13 @@ static int loadtime_init(struct qib_devdata *dd) if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { - qib_dev_err(dd, "Driver only handles version %d, " - "chip swversion is %d (%llx), failng\n", - QIB_CHIP_SWVERSION, - (int)(dd->revision >> + qib_dev_err(dd, + "Driver only handles version %d, chip swversion is %d (%llx), failng\n", + QIB_CHIP_SWVERSION, + (int)(dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & - QLOGIC_IB_R_SOFTWARE_MASK, - (unsigned long long) dd->revision); + QLOGIC_IB_R_SOFTWARE_MASK, + (unsigned long long) dd->revision); ret = -ENOSYS; goto done; } @@ -339,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd) dd->intrchk_timer.function = verify_interrupt; dd->intrchk_timer.data = (unsigned long) dd; + ret = qib_cq_init(dd); done: return ret; } @@ -348,7 +468,7 @@ done: * @dd: the qlogic_ib device * * sanity check at least some of the values after reset, and - * ensure no receive or transmit (explictly, in case reset + * ensure no receive or transmit (explicitly, in case reset * failed */ static int init_after_reset(struct qib_devdata *dd) @@ -405,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd) static void verify_interrupt(unsigned long opaque) { struct qib_devdata *dd = (struct qib_devdata *) opaque; + u64 int_counter; if (!dd) return; /* being torn down */ @@ -413,10 +534,11 @@ static void verify_interrupt(unsigned long opaque) * If we don't have a lid or any interrupts, let the user know and * don't bother checking again. */ - if (dd->int_counter == 0) { + int_counter = qib_int_counter(dd) - dd->z_int_counter; + if (int_counter == 0) { if (!dd->f_intr_fallback(dd)) - dev_err(&dd->pcidev->dev, "No interrupts detected, " - "not usable.\n"); + dev_err(&dd->pcidev->dev, + "No interrupts detected, not usable.\n"); else /* re-arm the timer to see if fallback works */ mod_timer(&dd->intrchk_timer, jiffies + HZ/2); } @@ -479,6 +601,47 @@ static void init_piobuf_state(struct qib_devdata *dd) } /** + * qib_create_workqueues - create per port workqueues + * @dd: the qlogic_ib device + */ +static int qib_create_workqueues(struct qib_devdata *dd) +{ + int pidx; + struct qib_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (!ppd->qib_wq) { + char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */ + snprintf(wq_name, sizeof(wq_name), "qib%d_%d", + dd->unit, pidx); + ppd->qib_wq = + create_singlethread_workqueue(wq_name); + if (!ppd->qib_wq) + goto wq_error; + } + } + return 0; +wq_error: + pr_err("create_singlethread_workqueue failed for port %d\n", + pidx + 1); + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } + } + return -ENOMEM; +} + +static void qib_free_pportdata(struct qib_pportdata *ppd) +{ + free_percpu(ppd->ibport_data.pmastats); + ppd->ibport_data.pmastats = NULL; +} + +/** * qib_init - do the actual initialization sequence on the chip * @dd: the qlogic_ib device * @reinit: reinitializing, so don't allocate new memory @@ -543,8 +706,8 @@ int qib_init(struct qib_devdata *dd, int reinit) if (!lastfail) lastfail = qib_setup_eagerbufs(rcd); if (lastfail) { - qib_dev_err(dd, "failed to allocate kernel ctxt's " - "rcvhdrq and/or egr bufs\n"); + qib_dev_err(dd, + "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); continue; } } @@ -583,10 +746,6 @@ int qib_init(struct qib_devdata *dd, int reinit) continue; } - /* let link come up, and enable IBC */ - spin_lock_irqsave(&ppd->lflags_lock, flags); - ppd->lflags &= ~QIBL_IB_LINK_DISABLED; - spin_unlock_irqrestore(&ppd->lflags_lock, flags); portok++; } @@ -764,6 +923,12 @@ static void qib_shutdown_device(struct qib_devdata *dd) * We can't count on interrupts since we are stopping. */ dd->f_quiet_serdes(ppd); + + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } + qib_free_pportdata(ppd); } qib_update_eeprom_log(dd); @@ -818,6 +983,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); +#ifdef CONFIG_DEBUG_FS + kfree(rcd->opstats); + rcd->opstats = NULL; +#endif kfree(rcd); } @@ -893,8 +1062,7 @@ static void qib_verify_pioperf(struct qib_devdata *dd) /* 1 GiB/sec, slightly over IB SDR line rate */ if (lcnt < (emsecs * 1024U)) qib_dev_err(dd, - "Performance problem: bandwidth to PIO buffers is " - "only %u MiB/sec\n", + "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n", lcnt / (u32) emsecs); preempt_enable(); @@ -908,7 +1076,6 @@ done: dd->f_set_armlaunch(dd, 1); } - void qib_free_devdata(struct qib_devdata *dd) { unsigned long flags; @@ -918,9 +1085,37 @@ void qib_free_devdata(struct qib_devdata *dd) list_del(&dd->list); spin_unlock_irqrestore(&qib_devs_lock, flags); +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_exit(&dd->verbs_dev); +#endif + free_percpu(dd->int_counter); ib_dealloc_device(&dd->verbs_dev.ibdev); } +u64 qib_int_counter(struct qib_devdata *dd) +{ + int cpu; + u64 int_counter = 0; + + for_each_possible_cpu(cpu) + int_counter += *per_cpu_ptr(dd->int_counter, cpu); + return int_counter; +} + +u64 qib_sps_ints(void) +{ + unsigned long flags; + struct qib_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + sps_ints += qib_int_counter(dd); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return sps_ints; +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -935,28 +1130,34 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) struct qib_devdata *dd; int ret; - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } + if (!dd) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&dd->list); + + idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); - ret = idr_get_new(&qib_unit_table, dd, &dd->unit); - if (ret >= 0) + + ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); + if (ret >= 0) { + dd->unit = ret; list_add(&dd->list, &qib_dev_list); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + idr_preload_end(); if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); - ib_dealloc_device(&dd->verbs_dev.ibdev); - dd = ERR_PTR(ret); + goto bail; + } + dd->int_counter = alloc_percpu(u64); + if (!dd->int_counter) { + ret = -ENOMEM; + qib_early_err(&pdev->dev, + "Could not allocate per-cpu int_counter\n"); goto bail; } @@ -967,12 +1168,18 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (qib_cpulist) qib_cpulist_count = count; else - qib_early_err(&pdev->dev, "Could not alloc cpulist " - "info, cpu affinity might be wrong\n"); + qib_early_err(&pdev->dev, + "Could not alloc cpulist info, cpu affinity might be wrong\n"); } - -bail: +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif return dd; +bail: + if (!list_empty(&dd->list)) + list_del_init(&dd->list); + ib_dealloc_device(&dd->verbs_dev.ibdev); + return ERR_PTR(ret);; } /* @@ -1009,14 +1216,13 @@ void qib_disable_after_error(struct qib_devdata *dd) *dd->devstatusp |= QIB_STATUS_HWERROR; } -static void __devexit qib_remove_one(struct pci_dev *); -static int __devinit qib_init_one(struct pci_dev *, - const struct pci_device_id *); +static void qib_remove_one(struct pci_dev *); +static int qib_init_one(struct pci_dev *, const struct pci_device_id *); -#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " +#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " -static const struct pci_device_id qib_pci_tbl[] = { +static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, @@ -1025,19 +1231,48 @@ static const struct pci_device_id qib_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, qib_pci_tbl); -struct pci_driver qib_driver = { +static struct pci_driver qib_driver = { .name = QIB_DRV_NAME, .probe = qib_init_one, - .remove = __devexit_p(qib_remove_one), + .remove = qib_remove_one, .id_table = qib_pci_tbl, .err_handler = &qib_pci_err_handler, }; +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = qib_notify_dca, + .next = NULL, + .priority = 0 +}; + +static int qib_notify_dca_device(struct device *device, void *data) +{ + struct qib_devdata *dd = dev_get_drvdata(device); + unsigned long event = *(unsigned long *)data; + + return dd->f_notify_dca(dd, event); +} + +static int qib_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int rval; + + rval = driver_for_each_device(&qib_driver.driver, NULL, + &event, qib_notify_dca_device); + return rval ? NOTIFY_BAD : NOTIFY_DONE; +} + +#endif + /* * Do all the generic driver unit- and chip-independent memory * allocation and initialization. */ -static int __init qlogic_ib_init(void) +static int __init qib_ib_init(void) { int ret; @@ -1046,79 +1281,63 @@ static int __init qlogic_ib_init(void) goto bail; /* - * We create our own workqueue mainly because we want to be - * able to flush it when devices are being removed. We can't - * use schedule_work()/flush_scheduled_work() because both - * unregister_netdev() and linkwatch_event take the rtnl lock, - * so flush_scheduled_work() can deadlock during device - * removal. - */ - qib_wq = create_workqueue("qib"); - if (!qib_wq) { - ret = -ENOMEM; - goto bail_dev; - } - - qib_cq_wq = create_singlethread_workqueue("qib_cq"); - if (!qib_cq_wq) { - ret = -ENOMEM; - goto bail_wq; - } - - /* * These must be called before the driver is registered with * the PCI subsystem. */ idr_init(&qib_unit_table); - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n"); - ret = -ENOMEM; - goto bail_cq_wq; - } +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_register_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_init(); +#endif ret = pci_register_driver(&qib_driver); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Unable to register driver: error %d\n", -ret); - goto bail_unit; + pr_err("Unable to register driver: error %d\n", -ret); + goto bail_dev; } /* not fatal if it doesn't work */ if (qib_init_qibfs()) - printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n"); + pr_err("Unable to register ipathfs\n"); goto bail; /* all OK */ -bail_unit: - idr_destroy(&qib_unit_table); -bail_cq_wq: - destroy_workqueue(qib_cq_wq); -bail_wq: - destroy_workqueue(qib_wq); bail_dev: +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif + idr_destroy(&qib_unit_table); qib_dev_cleanup(); bail: return ret; } -module_init(qlogic_ib_init); +module_init(qib_ib_init); /* * Do the non-unit driver cleanup, memory free, etc. at unload. */ -static void __exit qlogic_ib_cleanup(void) +static void __exit qib_ib_cleanup(void) { int ret; ret = qib_exit_qibfs(); if (ret) - printk(KERN_ERR QIB_DRV_NAME ": " - "Unable to cleanup counter filesystem: " - "error %d\n", -ret); + pr_err( + "Unable to cleanup counter filesystem: error %d\n", + -ret); +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif pci_unregister_driver(&qib_driver); - - destroy_workqueue(qib_wq); - destroy_workqueue(qib_cq_wq); +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif qib_cpulist_count = 0; kfree(qib_cpulist); @@ -1127,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void) qib_dev_cleanup(); } -module_exit(qlogic_ib_cleanup); +module_exit(qib_ib_cleanup); /* this can only be called after a successful initialization */ static void cleanup_device_data(struct qib_devdata *dd) @@ -1138,10 +1357,24 @@ static void cleanup_device_data(struct qib_devdata *dd) unsigned long flags; /* users can't do anything more with chip */ - for (pidx = 0; pidx < dd->num_pports; ++pidx) + for (pidx = 0; pidx < dd->num_pports; ++pidx) { if (dd->pport[pidx].statusp) *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; + spin_lock(&dd->pport[pidx].cc_shadow_lock); + + kfree(dd->pport[pidx].congestion_entries); + dd->pport[pidx].congestion_entries = NULL; + kfree(dd->pport[pidx].ccti_entries); + dd->pport[pidx].ccti_entries = NULL; + kfree(dd->pport[pidx].ccti_entries_shadow); + dd->pport[pidx].ccti_entries_shadow = NULL; + kfree(dd->pport[pidx].congestion_entries_shadow); + dd->pport[pidx].congestion_entries_shadow = NULL; + + spin_unlock(&dd->pport[pidx].cc_shadow_lock); + } + if (!qib_wc_pat) qib_disable_wc(dd); @@ -1155,7 +1388,7 @@ static void cleanup_device_data(struct qib_devdata *dd) if (dd->pageshadow) { struct page **tmpp = dd->pageshadow; dma_addr_t *tmpd = dd->physshadow; - int i, cnt = 0; + int i; for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { int ctxt_tidbase = ctxt * dd->rcvtidcnt; @@ -1168,13 +1401,13 @@ static void cleanup_device_data(struct qib_devdata *dd) PAGE_SIZE, PCI_DMA_FROMDEVICE); qib_release_user_pages(&tmpp[i], 1); tmpp[i] = NULL; - cnt++; } } - tmpp = dd->pageshadow; dd->pageshadow = NULL; vfree(tmpp); + dd->physshadow = NULL; + vfree(tmpd); } /* @@ -1196,6 +1429,7 @@ static void cleanup_device_data(struct qib_devdata *dd) } kfree(tmp); kfree(dd->boardname); + qib_cq_exit(dd); } /* @@ -1221,8 +1455,7 @@ static void qib_postinit_cleanup(struct qib_devdata *dd) qib_free_devdata(dd); } -static int __devinit qib_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret, j, pidx, initfail; struct qib_devdata *dd = NULL; @@ -1240,9 +1473,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, #ifdef CONFIG_PCI_MSI dd = qib_init_iba6120_funcs(pdev, ent); #else - qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " - "work if CONFIG_PCI_MSI is not enabled\n", - ent->device); + qib_early_err(&pdev->dev, + "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", + ent->device); dd = ERR_PTR(-ENODEV); #endif break; @@ -1256,8 +1489,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, break; default: - qib_early_err(&pdev->dev, "Failing on unknown QLogic " - "deviceid 0x%x\n", ent->device); + qib_early_err(&pdev->dev, + "Failing on unknown Intel deviceid 0x%x\n", + ent->device); ret = -ENODEV; } @@ -1266,6 +1500,10 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (ret) goto bail; /* error already printed */ + ret = qib_create_workqueues(dd); + if (ret) + goto bail; + /* do the generic initialization */ initfail = qib_init(dd, 0); @@ -1290,7 +1528,7 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (qib_mini_init || initfail || ret) { qib_stop_timers(dd); - flush_scheduled_work(); + flush_workqueue(ib_wq); for (pidx = 0; pidx < dd->num_pports; ++pidx) dd->f_quiet_serdes(dd->pport + pidx); if (qib_mini_init) @@ -1310,9 +1548,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (!qib_wc_pat) { ret = qib_enable_wc(dd); if (ret) { - qib_dev_err(dd, "Write combining not enabled " - "(err %d): performance may be poor\n", - -ret); + qib_dev_err(dd, + "Write combining not enabled (err %d): performance may be poor\n", + -ret); ret = 0; } } @@ -1322,7 +1560,7 @@ bail: return ret; } -static void __devexit qib_remove_one(struct pci_dev *pdev) +static void qib_remove_one(struct pci_dev *pdev) { struct qib_devdata *dd = pci_get_drvdata(pdev); int ret; @@ -1339,8 +1577,8 @@ static void __devexit qib_remove_one(struct pci_dev *pdev) qib_stop_timers(dd); - /* wait until all of our (qsfp) schedule_work() calls complete */ - flush_scheduled_work(); + /* wait until all of our (qsfp) queue_work() calls complete */ + flush_workqueue(ib_wq); ret = qibfs_remove(dd); if (ret) @@ -1364,6 +1602,7 @@ static void __devexit qib_remove_one(struct pci_dev *pdev) int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) { unsigned amt; + int old_node_id; if (!rcd->rcvhdrq) { dma_addr_t phys_hdrqtail; @@ -1373,14 +1612,18 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) sizeof(u32), PAGE_SIZE); gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrq = dma_alloc_coherent( &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, gfp_flags | __GFP_COMP); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrq) { - qib_dev_err(dd, "attempt to allocate %d bytes " - "for ctxt %u rcvhdrq failed\n", - amt, rcd->ctxt); + qib_dev_err(dd, + "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", + amt, rcd->ctxt); goto bail; } @@ -1391,9 +1634,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) } if (!(dd->flags & QIB_NODMA_RTAIL)) { + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; @@ -1409,8 +1654,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) return 0; bail_free: - qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u " - "rcvhdrqtailaddr failed\n", rcd->ctxt); + qib_dev_err(dd, + "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", + rcd->ctxt); vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; bail_free_hdrq: @@ -1436,6 +1682,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; gfp_t gfp_flags; + int old_node_id; /* * GFP_USER, but without GFP_FS, so buffer cache can be @@ -1454,25 +1701,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) size = rcd->rcvegrbuf_size; if (!rcd->rcvegrbuf) { rcd->rcvegrbuf = - kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), - GFP_KERNEL); + kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf) goto bail; } if (!rcd->rcvegrbuf_phys) { rcd->rcvegrbuf_phys = - kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), - GFP_KERNEL); + kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf_phys) goto bail_rcvegrbuf; } for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { if (rcd->rcvegrbuf[e]) continue; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvegrbuf[e] = dma_alloc_coherent(&dd->pcidev->dev, size, &rcd->rcvegrbuf_phys[e], gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvegrbuf[e]) goto bail_rcvegrbuf_phys; } diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 54a40828a10..f4918f2165e 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -96,8 +96,12 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs) * states, or if it transitions from any of the up (INIT or better) * states into any of the down states (except link recovery), then * call the chip-specific code to take appropriate actions. + * + * ppd->lflags could be 0 if this is the first time the interrupt + * handlers has been called but the link is already up. */ - if (lstate >= IB_PORT_INIT && (ppd->lflags & QIBL_LINKDOWN) && + if (lstate >= IB_PORT_INIT && + (!ppd->lflags || (ppd->lflags & QIBL_LINKDOWN)) && ltstate == IB_PHYSPORTSTATE_LINKUP) { /* transitioned to UP */ if (dd->f_ib_updown(ppd, 1, ibcs)) @@ -131,7 +135,8 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs) /* start a 75msec timer to clear symbol errors */ mod_timer(&ppd->symerr_clear_timer, msecs_to_jiffies(75)); - } else if (ltstate == IB_PHYSPORTSTATE_LINKUP) { + } else if (ltstate == IB_PHYSPORTSTATE_LINKUP && + !(ppd->lflags & QIBL_LINKACTIVE)) { /* active, but not active defered */ qib_hol_up(ppd); /* useful only for 6120 now */ *ppd->statusp |= @@ -219,15 +224,15 @@ void qib_bad_intrstatus(struct qib_devdata *dd) * We print the message and disable interrupts, in hope of * having a better chance of debugging the problem. */ - qib_dev_err(dd, "Read of chip interrupt status failed" - " disabling interrupts\n"); + qib_dev_err(dd, + "Read of chip interrupt status failed disabling interrupts\n"); if (allbits++) { /* disable interrupt delivery, something is very wrong */ if (allbits == 2) dd->f_set_intr_state(dd, 0); if (allbits == 3) { - qib_dev_err(dd, "2nd bad interrupt status, " - "unregistering interrupts\n"); + qib_dev_err(dd, + "2nd bad interrupt status, unregistering interrupts\n"); dd->flags |= QIB_BADINTR; dd->flags &= ~QIB_INITTED; dd->f_free_irq(dd); diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 4b80eb153d5..3b9afccaaad 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -35,21 +35,41 @@ /** * qib_alloc_lkey - allocate an lkey - * @rkt: lkey table in which to allocate the lkey * @mr: memory region that this lkey protects + * @dma_region: 0->normal key, 1->restricted DMA key + * + * Returns 0 if successful, otherwise returns -errno. + * + * Increments mr reference count as required. + * + * Sets the lkey field mr for non-dma regions. * - * Returns 1 if successful, otherwise returns 0. */ -int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) { unsigned long flags; u32 r; u32 n; - int ret; + int ret = 0; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; spin_lock_irqsave(&rkt->lock, flags); + /* special case for dma_mr lkey == 0 */ + if (dma_region) { + struct qib_mregion *tmr; + + tmr = rcu_access_pointer(dev->dma_mr); + if (!tmr) { + qib_get_mr(mr); + rcu_assign_pointer(dev->dma_mr, mr); + mr->lkey_published = 1; + } + goto success; + } + /* Find the next available LKEY */ r = rkt->next; n = r; @@ -57,11 +77,8 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) if (rkt->table[r] == NULL) break; r = (r + 1) & (rkt->max - 1); - if (r == n) { - spin_unlock_irqrestore(&rkt->lock, flags); - ret = 0; + if (r == n) goto bail; - } } rkt->next = (r + 1) & (rkt->max - 1); /* @@ -76,57 +93,58 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) mr->lkey |= 1 << 8; rkt->gen++; } - rkt->table[r] = mr; + qib_get_mr(mr); + rcu_assign_pointer(rkt->table[r], mr); + mr->lkey_published = 1; +success: spin_unlock_irqrestore(&rkt->lock, flags); - - ret = 1; - -bail: +out: return ret; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + ret = -ENOMEM; + goto out; } /** * qib_free_lkey - free an lkey - * @rkt: table from which to free the lkey - * @lkey: lkey id to free + * @mr: mr to free from tables */ -int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr) +void qib_free_lkey(struct qib_mregion *mr) { unsigned long flags; u32 lkey = mr->lkey; u32 r; - int ret; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (lkey == 0) { - if (dev->dma_mr && dev->dma_mr == mr) { - ret = atomic_read(&dev->dma_mr->refcount); - if (!ret) - dev->dma_mr = NULL; - } else - ret = 0; - } else { + spin_lock_irqsave(&rkt->lock, flags); + if (!mr->lkey_published) + goto out; + if (lkey == 0) + rcu_assign_pointer(dev->dma_mr, NULL); + else { r = lkey >> (32 - ib_qib_lkey_table_size); - ret = atomic_read(&dev->lk_table.table[r]->refcount); - if (!ret) - dev->lk_table.table[r] = NULL; + rcu_assign_pointer(rkt->table[r], NULL); } - spin_unlock_irqrestore(&dev->lk_table.lock, flags); - - if (ret) - ret = -EBUSY; - return ret; + qib_put_mr(mr); + mr->lkey_published = 0; +out: + spin_unlock_irqrestore(&rkt->lock, flags); } /** * qib_lkey_ok - check IB SGE for validity and initialize * @rkt: table containing lkey to check SGE against + * @pd: protection domain * @isge: outgoing internal SGE * @sge: SGE to check * @acc: access flags * * Return 1 if valid and successful, otherwise returns 0. * + * increments the reference count upon success + * * Check the IB SGE for validity and initialize our internal version * of it. */ @@ -136,23 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, struct qib_mregion *mr; unsigned n, m; size_t off; - int ret = 0; - unsigned long flags; /* * We use LKEY == zero for kernel virtual addresses * (see qib_get_dma_mr and qib_dma.c). */ - spin_lock_irqsave(&rkt->lock, flags); + rcu_read_lock(); if (sge->lkey == 0) { struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) goto bail; - if (!dev->dma_mr) + mr = rcu_dereference(dev->dma_mr); + if (!mr) + goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) goto bail; - atomic_inc(&dev->dma_mr->refcount); - isge->mr = dev->dma_mr; + rcu_read_unlock(); + + isge->mr = mr; isge->vaddr = (void *) sge->addr; isge->length = sge->length; isge->sge_length = sge->length; @@ -160,9 +180,9 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, isge->n = 0; goto ok; } - mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey || - mr->pd != &pd->ibpd)) + mr = rcu_dereference( + rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; off = sge->addr - mr->user_base; @@ -170,19 +190,35 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, off + sge->length > mr->length || (mr->access_flags & acc) != acc)) goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); off += mr->offset; - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= QIB_SEGSZ) { - m++; - n = 0; + if (mr->page_shift) { + /* + page sizes are uniform power of 2 so no loop is necessary + entries_spanned_by_off is the number of times the loop below + would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off/QIB_SEGSZ; + n = entries_spanned_by_off%QIB_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= QIB_SEGSZ) { + m++; + n = 0; + } } } - atomic_inc(&mr->refcount); isge->mr = mr; isge->vaddr = mr->map[m]->segs[n].vaddr + off; isge->length = mr->map[m]->segs[n].length - off; @@ -190,22 +226,24 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, isge->m = m; isge->n = n; ok: - ret = 1; + return 1; bail: - spin_unlock_irqrestore(&rkt->lock, flags); - return ret; + rcu_read_unlock(); + return 0; } /** * qib_rkey_ok - check the IB virtual address, length, and RKEY - * @dev: infiniband device - * @ss: SGE state + * @qp: qp for validation + * @sge: SGE state * @len: length of data * @vaddr: virtual address to place data * @rkey: rkey to check * @acc: access flags * * Return 1 if successful, otherwise 0. + * + * increments the reference count upon success */ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc) @@ -214,24 +252,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, struct qib_mregion *mr; unsigned n, m; size_t off; - int ret = 0; - unsigned long flags; /* * We use RKEY == zero for kernel virtual addresses * (see qib_get_dma_mr and qib_dma.c). */ - spin_lock_irqsave(&rkt->lock, flags); + rcu_read_lock(); if (rkey == 0) { struct qib_pd *pd = to_ipd(qp->ibqp.pd); struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) goto bail; - if (!dev->dma_mr) + mr = rcu_dereference(dev->dma_mr); + if (!mr) + goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) goto bail; - atomic_inc(&dev->dma_mr->refcount); - sge->mr = dev->dma_mr; + rcu_read_unlock(); + + sge->mr = mr; sge->vaddr = (void *) vaddr; sge->length = len; sge->sge_length = len; @@ -240,27 +280,44 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, goto ok; } - mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + mr = rcu_dereference( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; off = vaddr - mr->iova; if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); off += mr->offset; - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= QIB_SEGSZ) { - m++; - n = 0; + if (mr->page_shift) { + /* + page sizes are uniform power of 2 so no loop is necessary + entries_spanned_by_off is the number of times the loop below + would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off/QIB_SEGSZ; + n = entries_spanned_by_off%QIB_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= QIB_SEGSZ) { + m++; + n = 0; + } } } - atomic_inc(&mr->refcount); sge->mr = mr; sge->vaddr = mr->map[m]->segs[n].vaddr + off; sge->length = mr->map[m]->segs[n].length - off; @@ -268,10 +325,10 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, sge->m = m; sge->n = n; ok: - ret = 1; + return 1; bail: - spin_unlock_irqrestore(&rkt->lock, flags); - return ret; + rcu_read_unlock(); + return 0; } /* @@ -293,7 +350,9 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) if (pd->user || rkey == 0) goto bail; - mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; + mr = rcu_dereference_protected( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], + lockdep_is_held(&rkt->lock)); if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) goto bail; diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 94b0d1f3a8f..22c720e5740 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } +static int reply_failure(struct ib_smp *smp) +{ + /* + * The verbs framework will handle the directed/LID route + * packet changes. + */ + smp->method = IB_MGMT_METHOD_GET_RESP; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + smp->status |= IB_SMP_DIRECTION; + return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY; +} + static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) { struct ib_mad_send_buf *send_buf; @@ -90,14 +102,10 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) if (!ibp->sm_ah) { if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; - struct ib_ah_attr attr; - memset(&attr, 0, sizeof attr); - attr.dlid = ibp->sm_lid; - attr.port_num = ppd_from_ibp(ibp)->port; - ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + ah = qib_create_qp0_ah(ibp, ibp->sm_lid); if (IS_ERR(ah)) - ret = -EINVAL; + ret = PTR_ERR(ah); else { send_buf->ah = ah; ibp->sm_ah = to_iah(ah); @@ -396,6 +404,7 @@ static int get_linkdowndefaultstate(struct qib_pportdata *ppd) static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) { + int valid_mkey = 0; int ret = 0; /* Is the mkey in the process of expiring? */ @@ -406,23 +415,36 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) ibp->mkeyprot = 0; } - /* M_Key checking depends on Portinfo:M_Key_protect_bits */ - if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && ibp->mkey != 0 && - ibp->mkey != smp->mkey && - (smp->method == IB_MGMT_METHOD_SET || - smp->method == IB_MGMT_METHOD_TRAP_REPRESS || - (smp->method == IB_MGMT_METHOD_GET && ibp->mkeyprot >= 2))) { - if (ibp->mkey_violations != 0xFFFF) - ++ibp->mkey_violations; - if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) - ibp->mkey_lease_timeout = jiffies + - ibp->mkey_lease_period * HZ; - /* Generate a trap notice. */ - qib_bad_mkey(ibp, smp); - ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - } else if (ibp->mkey_lease_timeout) + if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 || + ibp->mkey == smp->mkey) + valid_mkey = 1; + + /* Unset lease timeout on any valid Get/Set/TrapRepress */ + if (valid_mkey && ibp->mkey_lease_timeout && + (smp->method == IB_MGMT_METHOD_GET || + smp->method == IB_MGMT_METHOD_SET || + smp->method == IB_MGMT_METHOD_TRAP_REPRESS)) ibp->mkey_lease_timeout = 0; + if (!valid_mkey) { + switch (smp->method) { + case IB_MGMT_METHOD_GET: + /* Bad mkey not a violation below level 2 */ + if (ibp->mkeyprot < 2) + break; + case IB_MGMT_METHOD_SET: + case IB_MGMT_METHOD_TRAP_REPRESS: + if (ibp->mkey_violations != 0xFFFF) + ++ibp->mkey_violations; + if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) + ibp->mkey_lease_timeout = jiffies + + ibp->mkey_lease_period * HZ; + /* Generate a trap notice. */ + qib_bad_mkey(ibp, smp); + ret = 1; + } + } + return ret; } @@ -433,7 +455,6 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, struct qib_pportdata *ppd; struct qib_ibport *ibp; struct ib_port_info *pip = (struct ib_port_info *)smp->data; - u16 lid; u8 mtu; int ret; u32 state; @@ -450,8 +471,10 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, if (port_num != port) { ibp = to_iport(ibdev, port_num); ret = check_mkey(ibp, smp, 0); - if (ret) + if (ret) { + ret = IB_MAD_RESULT_FAILURE; goto bail; + } } } @@ -464,12 +487,12 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, memset(smp->data, 0, sizeof(smp->data)); /* Only return the mkey if the protection field allows it. */ - if (smp->method == IB_MGMT_METHOD_SET || ibp->mkey == smp->mkey || - ibp->mkeyprot == 0) + if (!(smp->method == IB_MGMT_METHOD_GET && + ibp->mkey != smp->mkey && + ibp->mkeyprot == 1)) pip->mkey = ibp->mkey; pip->gid_prefix = ibp->gid_prefix; - lid = ppd->lid; - pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; + pip->lid = cpu_to_be16(ppd->lid); pip->sm_lid = cpu_to_be16(ibp->sm_lid); pip->cap_mask = cpu_to_be32(ibp->port_cap_flags); /* pip->diag_code; */ @@ -632,7 +655,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, struct qib_devdata *dd; struct qib_pportdata *ppd; struct qib_ibport *ibp; - char clientrereg = 0; + u8 clientrereg = (pip->clientrereg_resv_subnetto & 0x80); unsigned long flags; u16 lid, smlid; u8 lwe; @@ -668,8 +691,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, lid = be16_to_cpu(pip->lid); /* Must be a valid unicast LID address. */ if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE) - goto err; - if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) { + smp->status |= IB_SMP_INVALID_FIELD; + else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) { if (ppd->lid != lid) qib_set_uevent_bits(ppd, _QIB_EVENT_LID_CHANGE_BIT); if (ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) @@ -683,8 +706,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, msl = pip->neighbormtu_mastersmsl & 0xF; /* Must be a valid unicast LID address. */ if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE) - goto err; - if (smlid != ibp->sm_lid || msl != ibp->sm_sl) { + smp->status |= IB_SMP_INVALID_FIELD; + else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) { spin_lock_irqsave(&ibp->lock, flags); if (ibp->sm_ah) { if (smlid != ibp->sm_lid) @@ -705,10 +728,11 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, lwe = pip->link_width_enabled; if (lwe) { if (lwe == 0xFF) - lwe = ppd->link_width_supported; + set_link_width_enabled(ppd, ppd->link_width_supported); else if (lwe >= 16 || (lwe & ~ppd->link_width_supported)) - goto err; - set_link_width_enabled(ppd, lwe); + smp->status |= IB_SMP_INVALID_FIELD; + else if (lwe != ppd->link_width_enabled) + set_link_width_enabled(ppd, lwe); } lse = pip->linkspeedactive_enabled & 0xF; @@ -719,10 +743,12 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, * speeds. */ if (lse == 15) - lse = ppd->link_speed_supported; + set_link_speed_enabled(ppd, + ppd->link_speed_supported); else if (lse >= 8 || (lse & ~ppd->link_speed_supported)) - goto err; - set_link_speed_enabled(ppd, lse); + smp->status |= IB_SMP_INVALID_FIELD; + else if (lse != ppd->link_speed_enabled) + set_link_speed_enabled(ppd, lse); } /* Set link down default state. */ @@ -738,7 +764,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, IB_LINKINITCMD_POLL); break; default: - goto err; + smp->status |= IB_SMP_INVALID_FIELD; } ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6; @@ -748,15 +774,17 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF); if (mtu == -1) - goto err; - qib_set_mtu(ppd, mtu); + smp->status |= IB_SMP_INVALID_FIELD; + else + qib_set_mtu(ppd, mtu); /* Set operational VLs */ vls = (pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF; if (vls) { if (vls > ppd->vls_supported) - goto err; - (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls); + smp->status |= IB_SMP_INVALID_FIELD; + else + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls); } if (pip->mkey_violations == 0) @@ -770,19 +798,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, ore = pip->localphyerrors_overrunerrors; if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF)) - goto err; + smp->status |= IB_SMP_INVALID_FIELD; if (set_overrunthreshold(ppd, (ore & 0xF))) - goto err; + smp->status |= IB_SMP_INVALID_FIELD; ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; - if (pip->clientrereg_resv_subnetto & 0x80) { - clientrereg = 1; - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } - /* * Do the port state change now that the other link parameters * have been set. @@ -792,7 +814,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, state = pip->linkspeed_portstate & 0xF; lstate = (pip->portphysstate_linkdown >> 4) & 0xF; if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) - goto err; + smp->status |= IB_SMP_INVALID_FIELD; /* * Only state changes of DOWN, ARM, and ACTIVE are valid @@ -812,8 +834,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, lstate = QIB_IB_LINKDOWN; else if (lstate == 3) lstate = QIB_IB_LINKDOWN_DISABLE; - else - goto err; + else { + smp->status |= IB_SMP_INVALID_FIELD; + break; + } spin_lock_irqsave(&ppd->lflags_lock, flags); ppd->lflags &= ~QIBL_LINKV; spin_unlock_irqrestore(&ppd->lflags_lock, flags); @@ -835,16 +859,20 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, qib_set_linkstate(ppd, QIB_IB_LINKACTIVE); break; default: - /* XXX We have already partially updated our state! */ - goto err; + smp->status |= IB_SMP_INVALID_FIELD; + } + + if (clientrereg) { + event.event = IB_EVENT_CLIENT_REREGISTER; + ib_dispatch_event(&event); } ret = subn_get_portinfo(smp, ibdev, port); - if (clientrereg) - pip->clientrereg_resv_subnetto |= 0x80; + /* restore re-reg bit per o14-12.2.1 */ + pip->clientrereg_resv_subnetto |= clientrereg; - goto done; + goto get_only; err: smp->status |= IB_SMP_INVALID_FIELD; @@ -1000,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) event.event = IB_EVENT_PKEY_CHANGE; event.device = &dd->verbs_dev.ibdev; - event.element.port_num = 1; + event.element.port_num = port; ib_dispatch_event(&event); } return 0; @@ -1118,22 +1146,22 @@ static int subn_trap_repress(struct ib_smp *smp, struct ib_device *ibdev, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; } -static int pma_get_classportinfo(struct ib_perf *pmp, +static int pma_get_classportinfo(struct ib_pma_mad *pmp, struct ib_device *ibdev) { - struct ib_pma_classportinfo *p = - (struct ib_pma_classportinfo *)pmp->data; + struct ib_class_port_info *p = + (struct ib_class_port_info *)pmp->data; struct qib_devdata *dd = dd_from_ibdev(ibdev); memset(pmp->data, 0, sizeof(pmp->data)); - if (pmp->attr_mod != 0) - pmp->status |= IB_SMP_INVALID_FIELD; + if (pmp->mad_hdr.attr_mod != 0) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; /* Note that AllPortSelect is not valid */ p->base_version = 1; p->class_version = 1; - p->cap_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + p->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; /* * Set the most significant bit of CM2 to indicate support for * congestion statistics @@ -1147,7 +1175,7 @@ static int pma_get_classportinfo(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int pma_get_portsamplescontrol(struct ib_perf *pmp, +static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portsamplescontrol *p = @@ -1162,8 +1190,8 @@ static int pma_get_portsamplescontrol(struct ib_perf *pmp, memset(pmp->data, 0, sizeof(pmp->data)); p->port_select = port_select; - if (pmp->attr_mod != 0 || port_select != port) { - pmp->status |= IB_SMP_INVALID_FIELD; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; goto bail; } spin_lock_irqsave(&ibp->lock, flags); @@ -1185,7 +1213,7 @@ bail: return reply((struct ib_smp *) pmp); } -static int pma_set_portsamplescontrol(struct ib_perf *pmp, +static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portsamplescontrol *p = @@ -1198,8 +1226,8 @@ static int pma_set_portsamplescontrol(struct ib_perf *pmp, u8 status, xmit_flags; int ret; - if (pmp->attr_mod != 0 || p->port_select != port) { - pmp->status |= IB_SMP_INVALID_FIELD; + if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; ret = reply((struct ib_smp *) pmp); goto bail; } @@ -1314,7 +1342,7 @@ static u64 get_cache_hw_sample_counters(struct qib_pportdata *ppd, return ret; } -static int pma_get_portsamplesresult(struct ib_perf *pmp, +static int pma_get_portsamplesresult(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portsamplesresult *p = @@ -1353,7 +1381,7 @@ static int pma_get_portsamplesresult(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int pma_get_portsamplesresult_ext(struct ib_perf *pmp, +static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portsamplesresult_ext *p = @@ -1395,7 +1423,7 @@ static int pma_get_portsamplesresult_ext(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int pma_get_portcounters(struct ib_perf *pmp, +static int pma_get_portcounters(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) @@ -1429,8 +1457,8 @@ static int pma_get_portcounters(struct ib_perf *pmp, memset(pmp->data, 0, sizeof(pmp->data)); p->port_select = port_select; - if (pmp->attr_mod != 0 || port_select != port) - pmp->status |= IB_SMP_INVALID_FIELD; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; if (cntrs.symbol_error_counter > 0xFFFFUL) p->symbol_error_counter = cpu_to_be16(0xFFFF); @@ -1465,7 +1493,7 @@ static int pma_get_portcounters(struct ib_perf *pmp, cntrs.local_link_integrity_errors = 0xFUL; if (cntrs.excessive_buffer_overrun_errors > 0xFUL) cntrs.excessive_buffer_overrun_errors = 0xFUL; - p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | + p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | cntrs.excessive_buffer_overrun_errors; if (cntrs.vl15_dropped > 0xFFFFUL) p->vl15_dropped = cpu_to_be16(0xFFFF); @@ -1493,7 +1521,7 @@ static int pma_get_portcounters(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int pma_get_portcounters_cong(struct ib_perf *pmp, +static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { /* Congestion PMA packets start at offset 24 not 64 */ @@ -1503,7 +1531,7 @@ static int pma_get_portcounters_cong(struct ib_perf *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct qib_devdata *dd = dd_from_ppd(ppd); - u32 port_select = be32_to_cpu(pmp->attr_mod) & 0xFF; + u32 port_select = be32_to_cpu(pmp->mad_hdr.attr_mod) & 0xFF; u64 xmit_wait_counter; unsigned long flags; @@ -1512,9 +1540,9 @@ static int pma_get_portcounters_cong(struct ib_perf *pmp, * SET method ends up calling this anyway. */ if (!dd->psxmitwait_supported) - pmp->status |= IB_SMP_UNSUP_METH_ATTR; + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; if (port_select != port) - pmp->status |= IB_SMP_INVALID_FIELD; + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; qib_get_counters(ppd, &cntrs); spin_lock_irqsave(&ppd->ibport_data.lock, flags); @@ -1596,7 +1624,7 @@ static int pma_get_portcounters_cong(struct ib_perf *pmp, cntrs.local_link_integrity_errors = 0xFUL; if (cntrs.excessive_buffer_overrun_errors > 0xFUL) cntrs.excessive_buffer_overrun_errors = 0xFUL; - p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | + p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | cntrs.excessive_buffer_overrun_errors; if (cntrs.vl15_dropped > 0xFFFFUL) p->vl15_dropped = cpu_to_be16(0xFFFF); @@ -1606,7 +1634,24 @@ static int pma_get_portcounters_cong(struct ib_perf *pmp, return reply((struct ib_smp *)pmp); } -static int pma_get_portcounters_ext(struct ib_perf *pmp, +static void qib_snapshot_pmacounters( + struct qib_ibport *ibp, + struct qib_pma_counters *pmacounters) +{ + struct qib_pma_counters *p; + int cpu; + + memset(pmacounters, 0, sizeof(*pmacounters)); + for_each_possible_cpu(cpu) { + p = per_cpu_ptr(ibp->pmastats, cpu); + pmacounters->n_unicast_xmit += p->n_unicast_xmit; + pmacounters->n_unicast_rcv += p->n_unicast_rcv; + pmacounters->n_multicast_xmit += p->n_multicast_xmit; + pmacounters->n_multicast_rcv += p->n_multicast_rcv; + } +} + +static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portcounters_ext *p = @@ -1614,13 +1659,14 @@ static int pma_get_portcounters_ext(struct ib_perf *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; u8 port_select = p->port_select; memset(pmp->data, 0, sizeof(pmp->data)); p->port_select = port_select; - if (pmp->attr_mod != 0 || port_select != port) { - pmp->status |= IB_SMP_INVALID_FIELD; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; goto bail; } @@ -1636,16 +1682,23 @@ static int pma_get_portcounters_ext(struct ib_perf *pmp, p->port_rcv_data = cpu_to_be64(rwords); p->port_xmit_packets = cpu_to_be64(spkts); p->port_rcv_packets = cpu_to_be64(rpkts); - p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit); - p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv); - p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit); - p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv); + + qib_snapshot_pmacounters(ibp, &pma); + + p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit + - ibp->z_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv + - ibp->z_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit + - ibp->z_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv + - ibp->z_multicast_rcv); bail: return reply((struct ib_smp *) pmp); } -static int pma_set_portcounters(struct ib_perf *pmp, +static int pma_set_portcounters(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) @@ -1708,14 +1761,14 @@ static int pma_set_portcounters(struct ib_perf *pmp, return pma_get_portcounters(pmp, ibdev, port); } -static int pma_set_portcounters_cong(struct ib_perf *pmp, +static int pma_set_portcounters_cong(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct qib_devdata *dd = dd_from_ppd(ppd); struct qib_verbs_counters cntrs; - u32 counter_select = (be32_to_cpu(pmp->attr_mod) >> 24) & 0xFF; + u32 counter_select = (be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24) & 0xFF; int ret = 0; unsigned long flags; @@ -1759,7 +1812,7 @@ static int pma_set_portcounters_cong(struct ib_perf *pmp, return ret; } -static int pma_set_portcounters_ext(struct ib_perf *pmp, +static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) @@ -1767,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_perf *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); @@ -1782,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_perf *pmp, if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) ibp->z_port_rcv_packets = rpkts; + qib_snapshot_pmacounters(ibp, &pma); + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) - ibp->n_unicast_xmit = 0; + ibp->z_unicast_xmit = pma.n_unicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) - ibp->n_unicast_rcv = 0; + ibp->z_unicast_rcv = pma.n_unicast_rcv; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) - ibp->n_multicast_xmit = 0; + ibp->z_multicast_xmit = pma.n_multicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) - ibp->n_multicast_rcv = 0; + ibp->z_multicast_rcv = pma.n_multicast_rcv; return pma_get_portcounters_ext(pmp, ibdev, port); } @@ -1830,6 +1886,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, port_num && port_num <= ibdev->phys_port_cnt && port != port_num) (void) check_mkey(to_iport(ibdev, port_num), smp, 0); + ret = IB_MAD_RESULT_FAILURE; goto bail; } @@ -1952,19 +2009,19 @@ static int process_perf(struct ib_device *ibdev, u8 port, struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct ib_perf *pmp = (struct ib_perf *)out_mad; + struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; int ret; *out_mad = *in_mad; - if (pmp->class_version != 1) { - pmp->status |= IB_SMP_UNSUP_VERSION; + if (pmp->mad_hdr.class_version != 1) { + pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; ret = reply((struct ib_smp *) pmp); goto bail; } - switch (pmp->method) { + switch (pmp->mad_hdr.method) { case IB_MGMT_METHOD_GET: - switch (pmp->attr_id) { + switch (pmp->mad_hdr.attr_id) { case IB_PMA_CLASS_PORT_INFO: ret = pma_get_classportinfo(pmp, ibdev); goto bail; @@ -1987,13 +2044,13 @@ static int process_perf(struct ib_device *ibdev, u8 port, ret = pma_get_portcounters_cong(pmp, ibdev, port); goto bail; default: - pmp->status |= IB_SMP_UNSUP_METH_ATTR; + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_smp *) pmp); goto bail; } case IB_MGMT_METHOD_SET: - switch (pmp->attr_id) { + switch (pmp->mad_hdr.attr_id) { case IB_PMA_PORT_SAMPLES_CONTROL: ret = pma_set_portsamplescontrol(pmp, ibdev, port); goto bail; @@ -2007,7 +2064,7 @@ static int process_perf(struct ib_device *ibdev, u8 port, ret = pma_set_portcounters_cong(pmp, ibdev, port); goto bail; default: - pmp->status |= IB_SMP_UNSUP_METH_ATTR; + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_smp *) pmp); goto bail; } @@ -2023,7 +2080,7 @@ static int process_perf(struct ib_device *ibdev, u8 port, goto bail; default: - pmp->status |= IB_SMP_UNSUP_METHOD; + pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; ret = reply((struct ib_smp *) pmp); } @@ -2031,6 +2088,298 @@ bail: return ret; } +static int cc_get_classportinfo(struct ib_cc_mad *ccp, + struct ib_device *ibdev) +{ + struct ib_cc_classportinfo_attr *p = + (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->base_version = 1; + p->class_version = 1; + p->cap_mask = 0; + + /* + * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. + */ + p->resp_time_value = 18; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_info(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_info_attr *p = + (struct ib_cc_info_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->congestion_info = 0; + p->control_table_cap = ppd->cc_max_table_entries; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + int i; + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct ib_cc_congestion_entry_shadow *entries; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + entries = ppd->congestion_entries_shadow->entries; + p->port_control = cpu_to_be16( + ppd->congestion_entries_shadow->port_control); + p->control_map = cpu_to_be16( + ppd->congestion_entries_shadow->control_map); + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + p->entries[i].ccti_increase = entries[i].ccti_increase; + p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer); + p->entries[i].trigger_threshold = entries[i].trigger_threshold; + p->entries[i].ccti_min = entries[i].ccti_min; + } + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 max_cct_block; + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + max_cct_block = + (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES; + max_cct_block = max_cct_block ? max_cct_block - 1 : 0; + + if (cct_block_index > max_cct_block) { + spin_unlock(&ppd->cc_shadow_lock); + goto bail; + } + + ccp->attr_mod = cpu_to_be32(cct_block_index); + + cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1); + + cct_entry--; + + p->ccti_limit = cpu_to_be16(cct_entry); + + entries = &ppd->ccti_entries_shadow-> + entries[IB_CCT_ENTRIES * cct_block_index]; + cct_entry %= IB_CCT_ENTRIES; + + for (i = 0; i <= cct_entry; i++) + p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int cc_set_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int i; + + ppd->cc_sl_control_map = be16_to_cpu(p->control_map); + + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + ppd->congestion_entries[i].ccti_increase = + p->entries[i].ccti_increase; + + ppd->congestion_entries[i].ccti_timer = + be16_to_cpu(p->entries[i].ccti_timer); + + ppd->congestion_entries[i].trigger_threshold = + p->entries[i].trigger_threshold; + + ppd->congestion_entries[i].ccti_min = + p->entries[i].ccti_min; + } + + return reply((struct ib_smp *) ccp); +} + +static int cc_set_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + /* If this packet is the first in the sequence then + * zero the total table entry count. + */ + if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES) + ppd->total_cct_entry = 0; + + cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES; + + /* ccti_limit is 0 to 63 */ + ppd->total_cct_entry += (cct_entry + 1); + + if (ppd->total_cct_entry > ppd->cc_supported_table_entries) + goto bail; + + ppd->ccti_limit = be16_to_cpu(p->ccti_limit); + + entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index); + + for (i = 0; i <= cct_entry; i++) + entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry); + + spin_lock(&ppd->cc_shadow_lock); + + ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1; + memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries, + (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry))); + + ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED; + ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map; + memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries, + IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry)); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int check_cc_key(struct qib_ibport *ibp, + struct ib_cc_mad *ccp, int mad_flags) +{ + return 0; +} + +static int process_cc(struct ib_device *ibdev, int mad_flags, + u8 port, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; + struct qib_ibport *ibp = to_iport(ibdev, port); + int ret; + + *out_mad = *in_mad; + + if (ccp->class_version != 2) { + ccp->status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_smp *)ccp); + goto bail; + } + + ret = check_cc_key(ibp, ccp, mad_flags); + if (ret) + goto bail; + + switch (ccp->method) { + case IB_MGMT_METHOD_GET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CLASSPORTINFO: + ret = cc_get_classportinfo(ccp, ibdev); + goto bail; + + case IB_CC_ATTR_CONGESTION_INFO: + ret = cc_get_congestion_info(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_get_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_get_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_set_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_set_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + case IB_MGMT_METHOD_TRAP: + default: + ccp->status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_smp *) ccp); + } + +bail: + return ret; +} + /** * qib_process_mad - process an incoming MAD packet * @ibdev: the infiniband device this packet came in on @@ -2055,6 +2404,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, struct ib_mad *in_mad, struct ib_mad *out_mad) { int ret; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: @@ -2066,6 +2417,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = process_perf(ibdev, port, in_mad, out_mad); goto bail; + case IB_MGMT_CLASS_CONG_MGMT: + if (!ppd->congestion_entries_shadow || + !qib_cc_table_size) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + goto bail; + default: ret = IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h index 147aff9117d..941d4d50d8e 100644 --- a/drivers/infiniband/hw/qib/qib_mad.h +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -31,6 +31,10 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef _QIB_MAD_H +#define _QIB_MAD_H + +#include <rdma/ib_pma.h> #define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) #define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) @@ -50,7 +54,7 @@ struct ib_node_info { __be32 revision; u8 local_port_num; u8 vendor_id[3]; -} __attribute__ ((packed)); +} __packed; struct ib_mad_notice_attr { u8 generic_type; @@ -69,24 +73,24 @@ struct ib_mad_notice_attr { __be16 reserved; __be16 lid; /* where violation happened */ u8 port_num; /* where violation happened */ - } __attribute__ ((packed)) ntc_129_131; + } __packed ntc_129_131; struct { __be16 reserved; - __be16 lid; /* LID where change occured */ + __be16 lid; /* LID where change occurred */ u8 reserved2; u8 local_changes; /* low bit - local changes */ __be32 new_cap_mask; /* new capability mask */ u8 reserved3; u8 change_flags; /* low 3 bits only */ - } __attribute__ ((packed)) ntc_144; + } __packed ntc_144; struct { __be16 reserved; __be16 lid; /* lid where sys guid changed */ __be16 reserved2; __be64 new_sys_guid; - } __attribute__ ((packed)) ntc_145; + } __packed ntc_145; struct { __be16 reserved; @@ -100,7 +104,7 @@ struct ib_mad_notice_attr { u8 reserved3; u8 dr_trunc_hop; u8 dr_rtn_path[30]; - } __attribute__ ((packed)) ntc_256; + } __packed ntc_256; struct { __be16 reserved; @@ -111,7 +115,7 @@ struct ib_mad_notice_attr { __be32 qp2; /* high 8 bits reserved */ union ib_gid gid1; union ib_gid gid2; - } __attribute__ ((packed)) ntc_257_258; + } __packed ntc_257_258; } details; }; @@ -180,109 +184,8 @@ struct ib_vl_weight_elem { #define IB_VLARB_HIGHPRI_0_31 3 #define IB_VLARB_HIGHPRI_32_63 4 -/* - * PMA class portinfo capability mask bits - */ -#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8) -#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9) -#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12) - -#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) -#define IB_PMA_PORT_SAMPLES_CONTROL cpu_to_be16(0x0010) -#define IB_PMA_PORT_SAMPLES_RESULT cpu_to_be16(0x0011) -#define IB_PMA_PORT_COUNTERS cpu_to_be16(0x0012) -#define IB_PMA_PORT_COUNTERS_EXT cpu_to_be16(0x001D) -#define IB_PMA_PORT_SAMPLES_RESULT_EXT cpu_to_be16(0x001E) #define IB_PMA_PORT_COUNTERS_CONG cpu_to_be16(0xFF00) -struct ib_perf { - u8 base_version; - u8 mgmt_class; - u8 class_version; - u8 method; - __be16 status; - __be16 unused; - __be64 tid; - __be16 attr_id; - __be16 resv; - __be32 attr_mod; - u8 reserved[40]; - u8 data[192]; -} __attribute__ ((packed)); - -struct ib_pma_classportinfo { - u8 base_version; - u8 class_version; - __be16 cap_mask; - u8 reserved[3]; - u8 resp_time_value; /* only lower 5 bits */ - union ib_gid redirect_gid; - __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */ - __be16 redirect_lid; - __be16 redirect_pkey; - __be32 redirect_qp; /* only lower 24 bits */ - __be32 redirect_qkey; - union ib_gid trap_gid; - __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */ - __be16 trap_lid; - __be16 trap_pkey; - __be32 trap_hl_qp; /* 8, 24 bits respectively */ - __be32 trap_qkey; -} __attribute__ ((packed)); - -struct ib_pma_portsamplescontrol { - u8 opcode; - u8 port_select; - u8 tick; - u8 counter_width; /* only lower 3 bits */ - __be32 counter_mask0_9; /* 2, 10 * 3, bits */ - __be16 counter_mask10_14; /* 1, 5 * 3, bits */ - u8 sample_mechanisms; - u8 sample_status; /* only lower 2 bits */ - __be64 option_mask; - __be64 vendor_mask; - __be32 sample_start; - __be32 sample_interval; - __be16 tag; - __be16 counter_select[15]; -} __attribute__ ((packed)); - -struct ib_pma_portsamplesresult { - __be16 tag; - __be16 sample_status; /* only lower 2 bits */ - __be32 counter[15]; -} __attribute__ ((packed)); - -struct ib_pma_portsamplesresult_ext { - __be16 tag; - __be16 sample_status; /* only lower 2 bits */ - __be32 extended_width; /* only upper 2 bits */ - __be64 counter[15]; -} __attribute__ ((packed)); - -struct ib_pma_portcounters { - u8 reserved; - u8 port_select; - __be16 counter_select; - __be16 symbol_error_counter; - u8 link_error_recovery_counter; - u8 link_downed_counter; - __be16 port_rcv_errors; - __be16 port_rcv_remphys_errors; - __be16 port_rcv_switch_relay_errors; - __be16 port_xmit_discards; - u8 port_xmit_constraint_errors; - u8 port_rcv_constraint_errors; - u8 reserved1; - u8 lli_ebor_errors; /* 4, 4, bits */ - __be16 reserved2; - __be16 vl15_dropped; - __be32 port_xmit_data; - __be32 port_rcv_data; - __be32 port_xmit_packets; - __be32 port_rcv_packets; -} __attribute__ ((packed)); - struct ib_pma_portcounters_cong { u8 reserved; u8 reserved1; @@ -297,7 +200,7 @@ struct ib_pma_portcounters_cong { u8 port_xmit_constraint_errors; u8 port_rcv_constraint_errors; u8 reserved2; - u8 lli_ebor_errors; /* 4, 4, bits */ + u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */ __be16 reserved3; __be16 vl15_dropped; __be64 port_xmit_data; @@ -306,7 +209,7 @@ struct ib_pma_portcounters_cong { __be64 port_rcv_packets; __be64 port_xmit_wait; __be64 port_adr_events; -} __attribute__ ((packed)); +} __packed; #define IB_PMA_CONG_HW_CONTROL_TIMER 0x00 #define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01 @@ -316,48 +219,201 @@ struct ib_pma_portcounters_cong { /* number of 4nsec cycles equaling 2secs */ #define QIB_CONG_TIMER_PSINTERVAL 0x1DCD64EC -#define IB_PMA_SEL_SYMBOL_ERROR cpu_to_be16(0x0001) -#define IB_PMA_SEL_LINK_ERROR_RECOVERY cpu_to_be16(0x0002) -#define IB_PMA_SEL_LINK_DOWNED cpu_to_be16(0x0004) -#define IB_PMA_SEL_PORT_RCV_ERRORS cpu_to_be16(0x0008) -#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS cpu_to_be16(0x0010) -#define IB_PMA_SEL_PORT_XMIT_DISCARDS cpu_to_be16(0x0040) -#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS cpu_to_be16(0x0200) -#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS cpu_to_be16(0x0400) -#define IB_PMA_SEL_PORT_VL15_DROPPED cpu_to_be16(0x0800) -#define IB_PMA_SEL_PORT_XMIT_DATA cpu_to_be16(0x1000) -#define IB_PMA_SEL_PORT_RCV_DATA cpu_to_be16(0x2000) -#define IB_PMA_SEL_PORT_XMIT_PACKETS cpu_to_be16(0x4000) -#define IB_PMA_SEL_PORT_RCV_PACKETS cpu_to_be16(0x8000) - #define IB_PMA_SEL_CONG_ALL 0x01 #define IB_PMA_SEL_CONG_PORT_DATA 0x02 #define IB_PMA_SEL_CONG_XMIT 0x04 #define IB_PMA_SEL_CONG_ROUTING 0x08 -struct ib_pma_portcounters_ext { - u8 reserved; - u8 port_select; - __be16 counter_select; - __be32 reserved1; - __be64 port_xmit_data; - __be64 port_rcv_data; - __be64 port_xmit_packets; - __be64 port_rcv_packets; - __be64 port_unicast_xmit_packets; - __be64 port_unicast_rcv_packets; - __be64 port_multicast_xmit_packets; - __be64 port_multicast_rcv_packets; -} __attribute__ ((packed)); - -#define IB_PMA_SELX_PORT_XMIT_DATA cpu_to_be16(0x0001) -#define IB_PMA_SELX_PORT_RCV_DATA cpu_to_be16(0x0002) -#define IB_PMA_SELX_PORT_XMIT_PACKETS cpu_to_be16(0x0004) -#define IB_PMA_SELX_PORT_RCV_PACKETS cpu_to_be16(0x0008) -#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS cpu_to_be16(0x0010) -#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS cpu_to_be16(0x0020) -#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS cpu_to_be16(0x0040) -#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS cpu_to_be16(0x0080) +/* + * Congestion control class attributes + */ +#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001) +#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002) +#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011) +#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012) +#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013) +#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014) +#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015) +#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016) +#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017) +#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018) + +/* generalizations for threshold values */ +#define IB_CC_THRESHOLD_NONE 0x0 +#define IB_CC_THRESHOLD_MIN 0x1 +#define IB_CC_THRESHOLD_MAX 0xf + +/* CCA MAD header constants */ +#define IB_CC_MAD_LOGDATA_LEN 32 +#define IB_CC_MAD_MGMTDATA_LEN 192 + +struct ib_cc_mad { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 cckey; + + /* For CongestionLog attribute only */ + u8 log_data[IB_CC_MAD_LOGDATA_LEN]; + + u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN]; +} __packed; + +/* + * Congestion Control class portinfo capability mask bits + */ +#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0) +#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1) +#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2) +#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8) + +struct ib_cc_classportinfo_attr { + u8 base_version; + u8 class_version; + __be16 cap_mask; + u8 reserved[3]; + u8 resp_time_value; /* only lower 5 bits */ + union ib_gid redirect_gid; + __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 redirect_lid; + __be16 redirect_pkey; + __be32 redirect_qp; /* only lower 24 bits */ + __be32 redirect_qkey; + union ib_gid trap_gid; + __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 trap_lid; + __be16 trap_pkey; + __be32 trap_hl_qp; /* 8, 24 bits respectively */ + __be32 trap_qkey; +} __packed; + +/* Congestion control traps */ +#define IB_CC_TRAP_KEY_VIOLATION 0x0000 + +struct ib_cc_trap_key_violation_attr { + __be16 source_lid; + u8 method; + u8 reserved1; + __be16 attrib_id; + __be32 attrib_mod; + __be32 qp; + __be64 cckey; + u8 sgid[16]; + u8 padding[24]; +} __packed; + +/* Congestion info flags */ +#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1 +#define IB_CC_TABLE_CAP_DEFAULT 31 + +struct ib_cc_info_attr { + __be16 congestion_info; + u8 control_table_cap; /* Multiple of 64 entry unit CCTs */ +} __packed; + +struct ib_cc_key_info_attr { + __be64 cckey; + u8 protect; + __be16 lease_period; + __be16 violations; +} __packed; + +#define IB_CC_CL_CA_LOGEVENTS_LEN 208 + +struct ib_cc_log_attr { + u8 log_type; + u8 congestion_flags; + __be16 threshold_event_counter; + __be16 threshold_congestion_event_map; + __be16 current_time_stamp; + u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN]; +} __packed; + +#define IB_CC_CLEC_SERVICETYPE_RC 0x0 +#define IB_CC_CLEC_SERVICETYPE_UC 0x1 +#define IB_CC_CLEC_SERVICETYPE_RD 0x2 +#define IB_CC_CLEC_SERVICETYPE_UD 0x3 + +struct ib_cc_log_event { + u8 local_qp_cn_entry; + u8 remote_qp_number_cn_entry[3]; + u8 sl_cn_entry:4; + u8 service_type_cn_entry:4; + __be32 remote_lid_cn_entry; + __be32 timestamp_cn_entry; +} __packed; + +/* Sixteen congestion entries */ +#define IB_CC_CCS_ENTRIES 16 + +/* Port control flags */ +#define IB_CC_CCS_PC_SL_BASED 0x01 + +struct ib_cc_congestion_entry { + u8 ccti_increase; + __be16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_entry_shadow { + u8 ccti_increase; + u16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_setting_attr { + __be16 port_control; + __be16 control_map; + struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES]; +} __packed; + +struct ib_cc_congestion_setting_attr_shadow { + u16 port_control; + u16 control_map; + struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES]; +} __packed; + +#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1 +#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1 + +/* 64 Congestion Control table entries in a single MAD */ +#define IB_CCT_ENTRIES 64 +#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2) + +struct ib_cc_table_entry { + __be16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_entry_shadow { + u16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_attr { + __be16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +struct ib_cc_table_attr_shadow { + u16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +#define CC_TABLE_SHADOW_MAX \ + (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES) + +struct cc_table_shadow { + u16 ccti_last_entry; + struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX]; +} __packed; /* * The PortSamplesControl.CounterMasks field is an array of 3 bit fields @@ -371,3 +427,5 @@ struct ib_pma_portcounters_ext { COUNTER_MASK(1, 2) | \ COUNTER_MASK(1, 3) | \ COUNTER_MASK(1, 4)) + +#endif /* _QIB_MAD_H */ diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 5f95f0f6385..9bbb55347cc 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -39,7 +39,6 @@ /* Fast memory region */ struct qib_fmr { struct ib_fmr ibfmr; - u8 page_shift; struct qib_mregion mr; /* must be last */ }; @@ -48,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) return container_of(ibfmr, struct qib_fmr, ibfmr); } +static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd, + int count) +{ + int m, i = 0; + int rval = 0; + + m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; + for (; i < m; i++) { + mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL); + if (!mr->map[i]) + goto bail; + } + mr->mapsz = m; + init_completion(&mr->comp); + /* count returning the ptr to user */ + atomic_set(&mr->refcount, 1); + mr->pd = pd; + mr->max_segs = count; +out: + return rval; +bail: + while (i) + kfree(mr->map[--i]); + rval = -ENOMEM; + goto out; +} + +static void deinit_qib_mregion(struct qib_mregion *mr) +{ + int i = mr->mapsz; + + mr->mapsz = 0; + while (i) + kfree(mr->map[--i]); +} + + /** * qib_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region @@ -59,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) */ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) { - struct qib_ibdev *dev = to_idev(pd->device); - struct qib_mr *mr; + struct qib_mr *mr = NULL; struct ib_mr *ret; - unsigned long flags; + int rval; if (to_ipd(pd)->user) { ret = ERR_PTR(-EPERM); @@ -75,60 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) goto bail; } - mr->mr.access_flags = acc; - atomic_set(&mr->mr.refcount, 0); + rval = init_qib_mregion(&mr->mr, pd, 0); + if (rval) { + ret = ERR_PTR(rval); + goto bail; + } - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (!dev->dma_mr) - dev->dma_mr = &mr->mr; - spin_unlock_irqrestore(&dev->lk_table.lock, flags); + rval = qib_alloc_lkey(&mr->mr, 1); + if (rval) { + ret = ERR_PTR(rval); + goto bail_mregion; + } + + mr->mr.access_flags = acc; ret = &mr->ibmr; +done: + return ret; +bail_mregion: + deinit_qib_mregion(&mr->mr); bail: - return ret; + kfree(mr); + goto done; } -static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) +static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) { struct qib_mr *mr; - int m, i = 0; + int rval = -ENOMEM; + int m; /* Allocate struct plus pointers to first level page tables. */ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; - mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); + mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); if (!mr) - goto done; - - /* Allocate first level page tables. */ - for (; i < m; i++) { - mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); - if (!mr->mr.map[i]) - goto bail; - } - mr->mr.mapsz = m; - mr->mr.max_segs = count; + goto bail; + rval = init_qib_mregion(&mr->mr, pd, count); + if (rval) + goto bail; /* * ib_reg_phys_mr() will initialize mr->ibmr except for * lkey and rkey. */ - if (!qib_alloc_lkey(lk_table, &mr->mr)) - goto bail; + rval = qib_alloc_lkey(&mr->mr, 0); + if (rval) + goto bail_mregion; mr->ibmr.lkey = mr->mr.lkey; mr->ibmr.rkey = mr->mr.lkey; +done: + return mr; - atomic_set(&mr->mr.refcount, 0); - goto done; - +bail_mregion: + deinit_qib_mregion(&mr->mr); bail: - while (i) - kfree(mr->mr.map[--i]); kfree(mr); - mr = NULL; - -done: - return mr; + mr = ERR_PTR(rval); + goto done; } /** @@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, int n, m, i; struct ib_mr *ret; - mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); - if (mr == NULL) { - ret = ERR_PTR(-ENOMEM); + mr = alloc_mr(num_phys_buf, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; goto bail; } - mr->mr.pd = pd; mr->mr.user_base = *iova_start; mr->mr.iova = *iova_start; - mr->mr.length = 0; - mr->mr.offset = 0; mr->mr.access_flags = acc; - mr->umem = NULL; m = 0; n = 0; @@ -186,7 +221,6 @@ bail: * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register - * @virt_addr: virtual address to use (from HCA's point of view) * @mr_access_flags: access flags for this memory region * @udata: unused by the QLogic_IB driver * @@ -198,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct qib_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + struct scatterlist *sg; + int n, m, entry; struct ib_mr *ret; if (length == 0) { @@ -212,18 +246,15 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; + n = umem->nmap; - mr = alloc_mr(n, &to_idev(pd->device)->lk_table); - if (!mr) { - ret = ERR_PTR(-ENOMEM); + mr = alloc_mr(n, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; ib_umem_release(umem); goto bail; } - mr->mr.pd = pd; mr->mr.user_base = start; mr->mr.iova = virt_addr; mr->mr.length = length; @@ -231,13 +262,14 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.access_flags = mr_access_flags; mr->umem = umem; + if (is_power_of_2(umem->page_size)) + mr->mr.page_shift = ilog2(umem->page_size); m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { void *vaddr; - vaddr = page_address(sg_page(&chunk->page_list[i])); + vaddr = page_address(sg_page(sg)); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail; @@ -249,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m++; n = 0; } - } } ret = &mr->ibmr; @@ -269,21 +300,25 @@ bail: int qib_dereg_mr(struct ib_mr *ibmr) { struct qib_mr *mr = to_imr(ibmr); - struct qib_ibdev *dev = to_idev(ibmr->device); - int ret; - int i; - - ret = qib_free_lkey(dev, &mr->mr); - if (ret) - return ret; - - i = mr->mr.mapsz; - while (i) - kfree(mr->mr.map[--i]); + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&mr->mr); + + qib_put_mr(&mr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&mr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&mr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&mr->mr); if (mr->umem) ib_umem_release(mr->umem); kfree(mr); - return 0; +out: + return ret; } /* @@ -296,17 +331,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct qib_mr *mr; - mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table); - if (mr == NULL) - return ERR_PTR(-ENOMEM); - - mr->mr.pd = pd; - mr->mr.user_base = 0; - mr->mr.iova = 0; - mr->mr.length = 0; - mr->mr.offset = 0; - mr->mr.access_flags = 0; - mr->umem = NULL; + mr = alloc_mr(max_page_list_len, pd); + if (IS_ERR(mr)) + return (struct ib_mr *)mr; return &mr->ibmr; } @@ -320,11 +347,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) if (size > PAGE_SIZE) return ERR_PTR(-EINVAL); - pl = kmalloc(sizeof *pl, GFP_KERNEL); + pl = kzalloc(sizeof *pl, GFP_KERNEL); if (!pl) return ERR_PTR(-ENOMEM); - pl->page_list = kmalloc(size, GFP_KERNEL); + pl->page_list = kzalloc(size, GFP_KERNEL); if (!pl->page_list) goto err_free; @@ -353,57 +380,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) { struct qib_fmr *fmr; - int m, i = 0; + int m; struct ib_fmr *ret; + int rval = -ENOMEM; /* Allocate struct plus pointers to first level page tables. */ m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; - fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); + fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); if (!fmr) goto bail; - /* Allocate first level page tables. */ - for (; i < m; i++) { - fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], - GFP_KERNEL); - if (!fmr->mr.map[i]) - goto bail; - } - fmr->mr.mapsz = m; + rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); + if (rval) + goto bail; /* * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & * rkey. */ - if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) - goto bail; + rval = qib_alloc_lkey(&fmr->mr, 0); + if (rval) + goto bail_mregion; fmr->ibfmr.rkey = fmr->mr.lkey; fmr->ibfmr.lkey = fmr->mr.lkey; /* * Resources are allocated but no valid mapping (RKEY can't be * used). */ - fmr->mr.pd = pd; - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - fmr->mr.offset = 0; fmr->mr.access_flags = mr_access_flags; fmr->mr.max_segs = fmr_attr->max_pages; - fmr->page_shift = fmr_attr->page_shift; + fmr->mr.page_shift = fmr_attr->page_shift; - atomic_set(&fmr->mr.refcount, 0); ret = &fmr->ibfmr; - goto done; +done: + return ret; +bail_mregion: + deinit_qib_mregion(&fmr->mr); bail: - while (i) - kfree(fmr->mr.map[--i]); kfree(fmr); - ret = ERR_PTR(-ENOMEM); - -done: - return ret; + ret = ERR_PTR(rval); + goto done; } /** @@ -426,7 +443,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, u32 ps; int ret; - if (atomic_read(&fmr->mr.refcount)) + i = atomic_read(&fmr->mr.refcount); + if (i > 2) return -EBUSY; if (list_len > fmr->mr.max_segs) { @@ -437,7 +455,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, spin_lock_irqsave(&rkt->lock, flags); fmr->mr.user_base = iova; fmr->mr.iova = iova; - ps = 1 << fmr->page_shift; + ps = 1 << fmr->mr.page_shift; fmr->mr.length = list_len * ps; m = 0; n = 0; @@ -488,16 +506,27 @@ int qib_unmap_fmr(struct list_head *fmr_list) int qib_dealloc_fmr(struct ib_fmr *ibfmr) { struct qib_fmr *fmr = to_ifmr(ibfmr); - int ret; - int i; + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&fmr->mr); + qib_put_mr(&fmr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&fmr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&fmr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&fmr->mr); + kfree(fmr); +out: + return ret; +} - ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr); - if (ret) - return ret; +void mr_rcu_callback(struct rcu_head *list) +{ + struct qib_mregion *mr = container_of(list, struct qib_mregion, list); - i = fmr->mr.mapsz; - while (i) - kfree(fmr->mr.map[--i]); - kfree(fmr); - return 0; + complete(&mr->comp); } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 48b6674cbc4..61a0046efb7 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -35,6 +35,7 @@ #include <linux/delay.h> #include <linux/vmalloc.h> #include <linux/aer.h> +#include <linux/module.h> #include "qib.h" @@ -50,8 +51,8 @@ * file calls, even though this violates some * expectations of harmlessness. */ -static int qib_tune_pcie_caps(struct qib_devdata *); -static int qib_tune_pcie_coalesce(struct qib_devdata *); +static void qib_tune_pcie_caps(struct qib_devdata *); +static void qib_tune_pcie_coalesce(struct qib_devdata *); /* * Do all the common PCIe setup and initialization. @@ -193,31 +194,50 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) } static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, - struct msix_entry *msix_entry) + struct qib_msix_entry *qib_msix_entry) { int ret; - u32 tabsize = 0; - u16 msix_flags; - - pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags); - tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE); - if (tabsize > *msixcnt) - tabsize = *msixcnt; - ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); - if (ret > 0) { - tabsize = ret; - ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); - } - if (ret) { - qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, " - "falling back to INTx\n", tabsize, ret); - tabsize = 0; - } - *msixcnt = tabsize; - - if (ret) - qib_enable_intx(dd->pcidev); - + int nvec = *msixcnt; + struct msix_entry *msix_entry; + int i; + + ret = pci_msix_vec_count(dd->pcidev); + if (ret < 0) + goto do_intx; + + nvec = min(nvec, ret); + + /* We can't pass qib_msix_entry array to qib_msix_setup + * so use a dummy msix_entry array and copy the allocated + * irq back to the qib_msix_entry array. */ + msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL); + if (!msix_entry) + goto do_intx; + + for (i = 0; i < nvec; i++) + msix_entry[i] = qib_msix_entry[i].msix; + + ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); + if (ret < 0) + goto free_msix_entry; + else + nvec = ret; + + for (i = 0; i < nvec; i++) + qib_msix_entry[i].msix = msix_entry[i]; + + kfree(msix_entry); + *msixcnt = nvec; + return; + +free_msix_entry: + kfree(msix_entry); + +do_intx: + qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, " + "falling back to INTx\n", nvec, ret); + *msixcnt = 0; + qib_enable_intx(dd->pcidev); } /** @@ -233,8 +253,9 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos) ret = pci_enable_msi(pdev); if (ret) - qib_dev_err(dd, "pci_enable_msi failed: %d, " - "interrupts may not work\n", ret); + qib_dev_err(dd, + "pci_enable_msi failed: %d, interrupts may not work\n", + ret); /* continue even if it fails, we may still be OK... */ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, @@ -250,13 +271,12 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos) } int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, - struct msix_entry *entry) + struct qib_msix_entry *entry) { u16 linkstat, speed; - int pos = 0, pose, ret = 1; + int pos = 0, ret = 1; - pose = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP); - if (!pose) { + if (!pci_is_pcie(dd->pcidev)) { qib_dev_err(dd, "Can't find PCI Express capability!\n"); /* set up something... */ dd->lbus_width = 1; @@ -264,12 +284,12 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, goto bail; } - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX); + pos = dd->pcidev->msix_cap; if (nent && *nent && pos) { qib_msix_setup(dd, pos, nent, entry); ret = 0; /* did it, either MSIx or INTx */ } else { - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + pos = dd->pcidev->msi_cap; if (pos) ret = qib_msi_setup(dd, pos); else @@ -278,7 +298,7 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, if (!pos) qib_enable_intx(dd->pcidev); - pci_read_config_word(dd->pcidev, pose + PCI_EXP_LNKSTA, &linkstat); + pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); /* * speed is bits 0-3, linkwidth is bits 4-8 * no defines for them in headers @@ -338,10 +358,10 @@ int qib_reinit_intr(struct qib_devdata *dd) if (!dd->msi_lo) goto bail; - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + pos = dd->pcidev->msi_cap; if (!pos) { - qib_dev_err(dd, "Can't find MSI capability, " - "can't restore MSI settings\n"); + qib_dev_err(dd, + "Can't find MSI capability, can't restore MSI settings\n"); ret = 0; /* nothing special for MSIx, just MSI */ goto bail; @@ -407,7 +427,7 @@ void qib_enable_intx(struct pci_dev *pdev) if (new != cw) pci_write_config_word(pdev, PCI_COMMAND, new); - pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); + pos = pdev->msi_cap; if (pos) { /* then turn off MSI */ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); @@ -415,7 +435,7 @@ void qib_enable_intx(struct pci_dev *pdev) if (new != cw) pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); } - pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); + pos = pdev->msix_cap; if (pos) { /* then turn off MSIx */ pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw); @@ -453,34 +473,10 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); r = pci_enable_device(dd->pcidev); if (r) - qib_dev_err(dd, "pci_enable_device failed after " - "reset: %d\n", r); -} - -/* code to adjust PCIe capabilities. */ - -static int fld2val(int wd, int mask) -{ - int lsbmask; - - if (!mask) - return 0; - wd &= mask; - lsbmask = mask ^ (mask & (mask - 1)); - wd /= lsbmask; - return wd; + qib_dev_err(dd, + "pci_enable_device failed after reset: %d\n", r); } -static int val2fld(int wd, int mask) -{ - int lsbmask; - - if (!mask) - return 0; - lsbmask = mask ^ (mask & (mask - 1)); - wd *= lsbmask; - return wd; -} static int qib_pcie_coalesce; module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); @@ -492,28 +488,26 @@ MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); * of these chipsets, with some BIOS settings, and enabling it on those * systems may result in the system crashing, and/or data corruption. */ -static int qib_tune_pcie_coalesce(struct qib_devdata *dd) +static void qib_tune_pcie_coalesce(struct qib_devdata *dd) { int r; struct pci_dev *parent; - int ppos; u16 devid; u32 mask, bits, val; if (!qib_pcie_coalesce) - return 0; + return; /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; if (parent->bus->parent) { qib_devinfo(dd->pcidev, "Parent not root\n"); - return 1; + return; } - ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (!ppos) - return 1; + if (!pci_is_pcie(parent)) + return; if (parent->vendor != 0x8086) - return 1; + return; /* * - bit 12: Max_rdcmp_Imt_EN: need to set to 1 @@ -526,11 +520,8 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) */ devid = parent->device; if (devid >= 0x25e2 && devid <= 0x25fa) { - u8 rev; - /* 5000 P/V/X/Z */ - pci_read_config_byte(parent, PCI_REVISION_ID, &rev); - if (rev <= 0xb2) + if (parent->revision <= 0xb2) bits = 1U << 10; else bits = 7U << 10; @@ -549,13 +540,12 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) mask = (3U << 24) | (7U << 10); } else { /* not one of the chipsets that we know about */ - return 1; + return; } pci_read_config_dword(parent, 0x48, &val); val &= ~mask; val |= bits; r = pci_write_config_dword(parent, 0x48, val); - return 0; } /* @@ -564,62 +554,46 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) */ static int qib_pcie_caps; module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); -MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (4lsb), ReadReq (D4..7)"); +MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); -static int qib_tune_pcie_caps(struct qib_devdata *dd) +static void qib_tune_pcie_caps(struct qib_devdata *dd) { - int ret = 1; /* Assume the worst */ struct pci_dev *parent; - int ppos, epos; - u16 pcaps, pctl, ecaps, ectl; - int rc_sup, ep_sup; - int rc_cur, ep_cur; + u16 rc_mpss, rc_mps, ep_mpss, ep_mps; + u16 rc_mrrs, ep_mrrs, max_mrrs; /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; - if (parent->bus->parent) { + if (!pci_is_root_bus(parent->bus)) { qib_devinfo(dd->pcidev, "Parent not root\n"); - goto bail; + return; } - ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (ppos) { - pci_read_config_word(parent, ppos + PCI_EXP_DEVCAP, &pcaps); - pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); - } else - goto bail; + + if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) + return; + + rc_mpss = parent->pcie_mpss; + rc_mps = ffs(pcie_get_mps(parent)) - 8; /* Find out supported and configured values for endpoint (us) */ - epos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP); - if (epos) { - pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCAP, &ecaps); - pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, &ectl); - } else - goto bail; - ret = 0; - /* Find max payload supported by root, endpoint */ - rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD); - ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD); - if (rc_sup > ep_sup) - rc_sup = ep_sup; + ep_mpss = dd->pcidev->pcie_mpss; + ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8; - rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD); - ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD); + /* Find max payload supported by root, endpoint */ + if (rc_mpss > ep_mpss) + rc_mpss = ep_mpss; /* If Supported greater than limit in module param, limit it */ - if (rc_sup > (qib_pcie_caps & 7)) - rc_sup = qib_pcie_caps & 7; + if (rc_mpss > (qib_pcie_caps & 7)) + rc_mpss = qib_pcie_caps & 7; /* If less than (allowed, supported), bump root payload */ - if (rc_sup > rc_cur) { - rc_cur = rc_sup; - pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) | - val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD); - pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); + if (rc_mpss > rc_mps) { + rc_mps = rc_mpss; + pcie_set_mps(parent, 128 << rc_mps); } /* If less than (allowed, supported), bump endpoint payload */ - if (rc_sup > ep_cur) { - ep_cur = rc_sup; - ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) | - val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD); - pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); + if (rc_mpss > ep_mps) { + ep_mps = rc_mpss; + pcie_set_mps(dd->pcidev, 128 << ep_mps); } /* @@ -627,26 +601,22 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) * No field for max supported, but PCIe spec limits it to 4096, * which is code '5' (log2(4096) - 7) */ - rc_sup = 5; - if (rc_sup > ((qib_pcie_caps >> 4) & 7)) - rc_sup = (qib_pcie_caps >> 4) & 7; - rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ); - ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ); - - if (rc_sup > rc_cur) { - rc_cur = rc_sup; - pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) | - val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ); - pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); + max_mrrs = 5; + if (max_mrrs > ((qib_pcie_caps >> 4) & 7)) + max_mrrs = (qib_pcie_caps >> 4) & 7; + + max_mrrs = 128 << max_mrrs; + rc_mrrs = pcie_get_readrq(parent); + ep_mrrs = pcie_get_readrq(dd->pcidev); + + if (max_mrrs > rc_mrrs) { + rc_mrrs = max_mrrs; + pcie_set_readrq(parent, rc_mrrs); } - if (rc_sup > ep_cur) { - ep_cur = rc_sup; - ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) | - val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ); - pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); + if (max_mrrs > ep_mrrs) { + ep_mrrs = max_mrrs; + pcie_set_readrq(dd->pcidev, ep_mrrs); } -bail: - return ret; } /* End of PCIe capability tuning */ @@ -702,15 +672,16 @@ qib_pci_mmio_enabled(struct pci_dev *pdev) if (words == ~0ULL) ret = PCI_ERS_RESULT_NEED_RESET; } - qib_devinfo(pdev, "QIB mmio_enabled function called, " - "read wordscntr %Lx, returning %d\n", words, ret); + qib_devinfo(pdev, + "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n", + words, ret); return ret; } static pci_ers_result_t qib_pci_slot_reset(struct pci_dev *pdev) { - qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); + qib_devinfo(pdev, "QIB slot_reset function called, ignored\n"); return PCI_ERS_RESULT_CAN_RECOVER; } @@ -735,7 +706,7 @@ qib_pci_resume(struct pci_dev *pdev) qib_init(dd, 1); /* same as re-init after reset */ } -struct pci_error_handlers qib_pci_err_handler = { +const struct pci_error_handlers qib_pci_err_handler = { .error_detected = qib_pci_error_detected, .mmio_enabled = qib_pci_mmio_enabled, .link_reset = qib_pci_link_reset, diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 6c39851d2de..7fcc150d603 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -34,6 +34,10 @@ #include <linux/err.h> #include <linux/vmalloc.h> +#include <linux/jhash.h> +#ifdef CONFIG_DEBUG_FS +#include <linux/seq_file.h> +#endif #include "qib.h" @@ -48,13 +52,12 @@ static inline unsigned mk_qpn(struct qib_qpn_table *qpt, static inline unsigned find_next_offset(struct qib_qpn_table *qpt, struct qpn_map *map, unsigned off, - unsigned r) + unsigned n) { if (qpt->mask) { off++; - if ((off & qpt->mask) >> 1 != r) - off = ((off & qpt->mask) ? - (off | qpt->mask) + 1 : off) | (r << 1); + if (((off & qpt->mask) >> 1) >= n) + off = (off | qpt->mask) + 2; } else off = find_next_zero_bit(map->page, BITS_PER_PAGE, off); return off; @@ -123,7 +126,6 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, u32 i, offset, max_scan, qpn; struct qpn_map *map; u32 ret; - int r; if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -139,15 +141,11 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, goto bail; } - r = smp_processor_id(); - if (r >= dd->n_krcv_queues) - r %= dd->n_krcv_queues; - qpn = qpt->last + 1; + qpn = qpt->last + 2; if (qpn >= QPN_MAX) qpn = 2; - if (qpt->mask && ((qpn & qpt->mask) >> 1) != r) - qpn = ((qpn & qpt->mask) ? (qpn | qpt->mask) + 1 : qpn) | - (r << 1); + if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues) + qpn = (qpn | qpt->mask) + 2; offset = qpn & BITS_PER_PAGE_MASK; map = &qpt->map[qpn / BITS_PER_PAGE]; max_scan = qpt->nmaps - !offset; @@ -163,7 +161,8 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, ret = qpn; goto bail; } - offset = find_next_offset(qpt, map, offset, r); + offset = find_next_offset(qpt, map, offset, + dd->n_krcv_queues); qpn = mk_qpn(qpt, map, offset); /* * This test differs from alloc_pidmap(). @@ -183,13 +182,13 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, if (qpt->nmaps == QPNMAP_ENTRIES) break; map = &qpt->map[qpt->nmaps++]; - offset = qpt->mask ? (r << 1) : 0; + offset = 0; } else if (map < &qpt->map[qpt->nmaps]) { ++map; - offset = qpt->mask ? (r << 1) : 0; + offset = 0; } else { map = &qpt->map[0]; - offset = qpt->mask ? (r << 1) : 2; + offset = 2; } qpn = mk_qpn(qpt, map, offset); } @@ -209,6 +208,13 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); } +static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) +{ + return jhash_1word(qpn, dev->qp_rnd) & + (dev->qp_table_size - 1); +} + + /* * Put the QP into the hash table. * The hash table holds a reference to the QP. @@ -216,20 +222,20 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - unsigned n = qp->ibqp.qp_num % dev->qp_table_size; unsigned long flags; + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); + atomic_inc(&qp->refcount); spin_lock_irqsave(&dev->qpt_lock, flags); if (qp->ibqp.qp_num == 0) - ibp->qp0 = qp; + rcu_assign_pointer(ibp->qp0, qp); else if (qp->ibqp.qp_num == 1) - ibp->qp1 = qp; + rcu_assign_pointer(ibp->qp1, qp); else { qp->next = dev->qp_table[n]; - dev->qp_table[n] = qp; + rcu_assign_pointer(dev->qp_table[n], qp); } - atomic_inc(&qp->refcount); spin_unlock_irqrestore(&dev->qpt_lock, flags); } @@ -241,29 +247,41 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct qib_qp *q, **qpp; + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); unsigned long flags; - - qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size]; + int removed = 1; spin_lock_irqsave(&dev->qpt_lock, flags); - if (ibp->qp0 == qp) { - ibp->qp0 = NULL; - atomic_dec(&qp->refcount); - } else if (ibp->qp1 == qp) { - ibp->qp1 = NULL; - atomic_dec(&qp->refcount); - } else - for (; (q = *qpp) != NULL; qpp = &q->next) + if (rcu_dereference_protected(ibp->qp0, + lockdep_is_held(&dev->qpt_lock)) == qp) { + rcu_assign_pointer(ibp->qp0, NULL); + } else if (rcu_dereference_protected(ibp->qp1, + lockdep_is_held(&dev->qpt_lock)) == qp) { + rcu_assign_pointer(ibp->qp1, NULL); + } else { + struct qib_qp *q; + struct qib_qp __rcu **qpp; + + removed = 0; + qpp = &dev->qp_table[n]; + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock))) != NULL; + qpp = &q->next) if (q == qp) { - *qpp = qp->next; - qp->next = NULL; - atomic_dec(&qp->refcount); + rcu_assign_pointer(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))); + removed = 1; break; } + } spin_unlock_irqrestore(&dev->qpt_lock, flags); + if (removed) { + synchronize_rcu(); + atomic_dec(&qp->refcount); + } } /** @@ -285,21 +303,26 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) if (!qib_mcast_tree_empty(ibp)) qp_inuse++; - if (ibp->qp0) + rcu_read_lock(); + if (rcu_dereference(ibp->qp0)) qp_inuse++; - if (ibp->qp1) + if (rcu_dereference(ibp->qp1)) qp_inuse++; + rcu_read_unlock(); } spin_lock_irqsave(&dev->qpt_lock, flags); for (n = 0; n < dev->qp_table_size; n++) { - qp = dev->qp_table[n]; - dev->qp_table[n] = NULL; + qp = rcu_dereference_protected(dev->qp_table[n], + lockdep_is_held(&dev->qpt_lock)); + rcu_assign_pointer(dev->qp_table[n], NULL); - for (; qp; qp = qp->next) + for (; qp; qp = rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))) qp_inuse++; } spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); return qp_inuse; } @@ -314,25 +337,28 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) */ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) { - struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; - unsigned long flags; - struct qib_qp *qp; + struct qib_qp *qp = NULL; - spin_lock_irqsave(&dev->qpt_lock, flags); + rcu_read_lock(); + if (unlikely(qpn <= 1)) { + if (qpn == 0) + qp = rcu_dereference(ibp->qp0); + else + qp = rcu_dereference(ibp->qp1); + if (qp) + atomic_inc(&qp->refcount); + } else { + struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; + unsigned n = qpn_hash(dev, qpn); - if (qpn == 0) - qp = ibp->qp0; - else if (qpn == 1) - qp = ibp->qp1; - else - for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp; - qp = qp->next) - if (qp->ibqp.qp_num == qpn) + for (qp = rcu_dereference(dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) { + atomic_inc(&qp->refcount); break; - if (qp) - atomic_inc(&qp->refcount); - - spin_unlock_irqrestore(&dev->qpt_lock, flags); + } + } + rcu_read_unlock(); return qp; } @@ -393,18 +419,9 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) unsigned n; if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (clr_sends) { while (qp->s_last != qp->s_head) { @@ -414,7 +431,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || @@ -424,7 +441,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) qp->s_last = 0; } if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } } @@ -437,7 +454,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } } @@ -468,6 +485,10 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); del_timer(&qp->s_timer); } + + if (qp->s_flags & QIB_S_ANY_WAIT_SEND) + qp->s_flags &= ~QIB_S_ANY_WAIT_SEND; + spin_lock(&dev->pending_lock); if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) { qp->s_flags &= ~QIB_S_ANY_WAIT_IO; @@ -478,7 +499,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) if (!(qp->s_flags & QIB_S_BUSY)) { qp->s_hdrwords = 0; if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_tx) { @@ -564,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask)) + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) goto inval; if (attr_mask & IB_QP_AV) { @@ -766,8 +787,10 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } } - if (attr_mask & IB_QP_PATH_MTU) + if (attr_mask & IB_QP_PATH_MTU) { qp->path_mtu = pmtu; + qp->pmtu = ib_mtu_enum_to_int(pmtu); + } if (attr_mask & IB_QP_RETRY_CNT) { qp->s_retry_cnt = attr->retry_cnt; @@ -782,8 +805,12 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MIN_RNR_TIMER) qp->r_min_rnr_timer = attr->min_rnr_timer; - if (attr_mask & IB_QP_TIMEOUT) + if (attr_mask & IB_QP_TIMEOUT) { qp->timeout = attr->timeout; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + } if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; @@ -958,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, struct ib_qp *ret; if (init_attr->cap.max_send_sge > ib_qib_max_sges || - init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { + init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || + init_attr->create_flags) { ret = ERR_PTR(-EINVAL); goto bail; } @@ -1014,6 +1042,15 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, ret = ERR_PTR(-ENOMEM); goto bail_swq; } + RCU_INIT_POINTER(qp->next, NULL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + if (!qp->s_hdr) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); if (init_attr->srq) sz = 0; else { @@ -1061,7 +1098,6 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; - qp->processor_id = smp_processor_id(); qib_reset_qp(qp, init_attr->qp_type); break; @@ -1133,6 +1169,7 @@ bail_ip: vfree(qp->r_rq.wq); free_qpn(&dev->qpn_table, qp->ibqp.qp_num); bail_qp: + kfree(qp->s_hdr); kfree(qp); bail_swq: vfree(swq); @@ -1188,6 +1225,7 @@ int qib_destroy_qp(struct ib_qp *ibqp) else vfree(qp->r_rq.wq); vfree(qp->s_wq); + kfree(qp->s_hdr); kfree(qp); return 0; } @@ -1253,3 +1291,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth) } } } + +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter { + struct qib_ibdev *dev; + struct qib_qp *qp; + int n; +}; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) +{ + struct qib_qp_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int qib_qp_iter_next(struct qib_qp_iter *iter) +{ + struct qib_ibdev *dev = iter->dev; + int n = iter->n; + int ret = 1; + struct qib_qp *pqp = iter->qp; + struct qib_qp *qp; + + rcu_read_lock(); + for (; n < dev->qp_table_size; n++) { + if (pqp) + qp = rcu_dereference(pqp->next); + else + qp = rcu_dereference(dev->qp_table[n]); + pqp = qp; + if (qp) { + if (iter->qp) + atomic_dec(&iter->qp->refcount); + atomic_inc(&qp->refcount); + rcu_read_unlock(); + iter->qp = qp; + iter->n = n; + return 0; + } + } + rcu_read_unlock(); + if (iter->qp) + atomic_dec(&iter->qp->refcount); + return ret; +} + +static const char * const qp_type_str[] = { + "SMI", "GSI", "RC", "UC", "UD", +}; + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) +{ + struct qib_swqe *wqe; + struct qib_qp *qp = iter->qp; + + wqe = get_swqe_ptr(qp, qp->s_last); + seq_printf(s, + "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n", + iter->n, + qp->ibqp.qp_num, + qp_type_str[qp->ibqp.qp_type], + qp->state, + wqe->wr.opcode, + qp->s_hdrwords, + qp->s_flags, + atomic_read(&qp->s_dma_busy), + !list_empty(&qp->iowait), + qp->timeout, + wqe->ssn, + qp->s_lsn, + qp->s_last_psn, + qp->s_psn, qp->s_next_psn, + qp->s_sending_psn, qp->s_sending_hpsn, + qp->s_last, qp->s_acked, qp->s_cur, + qp->s_tail, qp->s_head, qp->s_size, + qp->remote_qpn, + qp->remote_ah_attr.dlid); +} + +#endif diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c index 35b3604b691..fa71b1e666c 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.c +++ b/drivers/infiniband/hw/qib/qib_qsfp.c @@ -273,18 +273,12 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp) int ret; int idx; u16 cks; - u32 mask; u8 peek[4]; /* ensure sane contents on invalid reads, for cable swaps */ memset(cp, 0, sizeof(*cp)); - mask = QSFP_GPIO_MOD_PRS_N; - if (ppd->hw_pidx) - mask <<= QSFP_GPIO_PORT2_SHIFT; - - ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0); - if (ret & mask) { + if (!qib_qsfp_mod_present(ppd)) { ret = -ENODEV; goto bail; } @@ -444,6 +438,19 @@ const char * const qib_qsfp_devtech[16] = { static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; +int qib_qsfp_mod_present(struct qib_pportdata *ppd) +{ + u32 mask; + int ret; + + mask = QSFP_GPIO_MOD_PRS_N << + (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT); + ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0); + + return !((ret & mask) >> + ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3)); +} + /* * Initialize structures that control access to QSFP. Called once per port * on cards that support QSFP. @@ -452,7 +459,6 @@ void qib_qsfp_init(struct qib_qsfp_data *qd, void (*fevent)(struct work_struct *)) { u32 mask, highs; - int pins; struct qib_devdata *dd = qd->ppd->dd; @@ -474,29 +480,15 @@ void qib_qsfp_init(struct qib_qsfp_data *qd, udelay(20); /* Generous RST dwell */ dd->f_gpio_mod(dd, mask, mask, mask); - /* Spec says module can take up to two seconds! */ - mask = QSFP_GPIO_MOD_PRS_N; - if (qd->ppd->hw_pidx) - mask <<= QSFP_GPIO_PORT2_SHIFT; - - /* Do not try to wait here. Better to let event handle it */ - pins = dd->f_gpio_mod(dd, 0, 0, 0); - if (pins & mask) - goto bail; - /* We see a module, but it may be unwise to look yet. Just schedule */ - qd->t_insert = get_jiffies_64(); - schedule_work(&qd->work); -bail: return; } void qib_qsfp_deinit(struct qib_qsfp_data *qd) { /* - * There is nothing to do here for now. our - * work is scheduled with schedule_work(), and - * flush_scheduled_work() from remove_one will - * block until all work ssetup with schedule_work() + * There is nothing to do here for now. our work is scheduled + * with queue_work(), and flush_workqueue() from remove_one + * will block until all work setup with queue_work() * completes. */ } diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h index 19b527bafd5..91908f533a2 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.h +++ b/drivers/infiniband/hw/qib/qib_qsfp.h @@ -34,6 +34,7 @@ #define QSFP_DEV 0xA0 #define QSFP_PWR_LAG_MSEC 2000 +#define QSFP_MODPRS_LAG_MSEC 20 /* * Below are masks for various QSFP signals, for Port 1. @@ -79,6 +80,8 @@ extern const char *const qib_qsfp_devtech[16]; /* Active Equalization includes fiber, copper full EQ, and copper near Eq */ #define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1) +/* Active Equalization includes fiber, copper full EQ, and copper far Eq */ +#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1) /* Attenuation should be valid for copper other than full/near Eq */ #define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1) /* Length is only valid if technology is "copper" */ @@ -174,11 +177,13 @@ struct qib_qsfp_data { struct qib_pportdata *ppd; struct work_struct work; struct qib_qsfp_cache cache; - u64 t_insert; + unsigned long t_insert; + u8 modpresent; }; extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp); +extern int qib_qsfp_mod_present(struct qib_pportdata *ppd); extern void qib_qsfp_init(struct qib_qsfp_data *qd, void (*fevent)(struct work_struct *)); extern void qib_qsfp_deinit(struct qib_qsfp_data *qd); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 955fb715779..2f2501890c4 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -59,8 +59,7 @@ static void start_timer(struct qib_qp *qp) qp->s_flags |= QIB_S_TIMER; qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ - qp->s_timer.expires = jiffies + - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); + qp->s_timer.expires = jiffies + qp->timeout_jiffies; add_timer(&qp->s_timer); } @@ -96,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; if (e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } /* FALLTHROUGH */ @@ -134,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, /* Copy SGE state in case we need to resend */ qp->s_rdma_mr = e->rdma_sge.mr; if (qp->s_rdma_mr) - atomic_inc(&qp->s_rdma_mr->refcount); + qib_get_mr(qp->s_rdma_mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; qp->s_cur_sge = &qp->s_ack_rdma_sge; @@ -173,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, qp->s_cur_sge = &qp->s_ack_rdma_sge; qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; if (qp->s_rdma_mr) - atomic_inc(&qp->s_rdma_mr->refcount); + qib_get_mr(qp->s_rdma_mr); len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) len = pmtu; @@ -239,15 +238,15 @@ int qib_make_rc_req(struct qib_qp *qp) u32 len; u32 bth0; u32 bth2; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; char newreq; unsigned long flags; int ret = 0; int delta; - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* * The lock is needed to synchronize between the sending tasklet, @@ -272,13 +271,9 @@ int qib_make_rc_req(struct qib_qp *qp) goto bail; } wqe = get_swqe_ptr(qp, qp->s_last); - while (qp->s_last != qp->s_acked) { - qib_send_complete(qp, wqe, IB_WC_SUCCESS); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - wqe = get_swqe_ptr(qp, qp->s_last); - } - qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); + /* will get called again */ goto done; } @@ -757,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp) qib_flush_wc(); qib_sendbuf_done(dd, pbufn); - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); goto done; queue_ack: @@ -1005,7 +1000,8 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) * there are still requests that haven't been acked. */ if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && - !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN))) + !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) && + (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) start_timer(qp); while (qp->s_last != qp->s_acked) { @@ -1016,7 +1012,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1072,7 +1068,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp, for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1407,6 +1403,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, struct qib_ctxtdata *rcd) { struct qib_swqe *wqe; + struct qib_pportdata *ppd = ppd_from_ibp(ibp); enum ib_wc_status status; unsigned long flags; int diff; @@ -1414,7 +1411,32 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, u32 aeth; u64 val; + if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) { + /* + * If ACK'd PSN on SDMA busy list try to make progress to + * reclaim SDMA credits. + */ + if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) && + (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) { + + /* + * If send tasklet not running attempt to progress + * SDMA queue. + */ + if (!(qp->s_flags & QIB_S_BUSY)) { + /* Acquire SDMA Lock */ + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* Invoke sdma make progress */ + qib_sdma_make_progress(ppd); + /* Release SDMA Lock */ + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + } + spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + goto ack_done; /* Ignore invalid responses. */ if (qib_cmp24(psn, qp->s_next_psn) >= 0) @@ -1492,9 +1514,7 @@ read_middle: * 4.096 usec. * (1 << qp->timeout) */ qp->s_flags |= QIB_S_TIMER; - mod_timer(&qp->s_timer, jiffies + - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL)); + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); if (qp->s_flags & QIB_S_WAIT_ACK) { qp->s_flags &= ~QIB_S_WAIT_ACK; qib_schedule_send(qp); @@ -1705,12 +1725,12 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, * same request. */ offset = ((psn - e->psn) & QIB_PSN_MASK) * - ib_mtu_enum_to_int(qp->path_mtu); + qp->pmtu; len = be32_to_cpu(reth->length); if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; if (e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } if (len != 0) { @@ -1849,7 +1869,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, u32 psn; u32 pad; struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; int diff; struct ib_reth *reth; unsigned long flags; @@ -1865,10 +1885,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, } opcode = be32_to_cpu(ohdr->bth[0]); - spin_lock_irqsave(&qp->s_lock, flags); if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) - goto sunlock; - spin_unlock_irqrestore(&qp->s_lock, flags); + return; psn = be32_to_cpu(ohdr->bth[2]); opcode >>= 24; @@ -1928,8 +1946,6 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, break; } - memset(&wc, 0, sizeof wc); - if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { qp->r_flags |= QIB_R_COMM_EST; if (qp->ibqp.event_handler) { @@ -1982,16 +1998,19 @@ send_middle: goto rnr_nak; qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) - goto send_last; - /* FALLTHROUGH */ + goto no_immediate_data; + /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ + goto send_last; case OP(SEND_LAST): case OP(RDMA_WRITE_LAST): +no_immediate_data: + wc.wc_flags = 0; + wc.ex.imm_data = 0; send_last: /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; @@ -2005,11 +2024,7 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; qib_copy_sge(&qp->r_sge, data, tlen, 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); qp->r_msn++; if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) break; @@ -2024,6 +2039,11 @@ send_last: wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -2062,7 +2082,7 @@ send_last: if (opcode == OP(RDMA_WRITE_FIRST)) goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) - goto send_last; + goto no_immediate_data; ret = qib_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; @@ -2092,7 +2112,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } reth = &ohdr->u.rc.reth; @@ -2164,7 +2184,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; @@ -2186,7 +2206,7 @@ send_last: (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, be64_to_cpu(ateth->compare_data), sdata); - atomic_dec(&qp->r_sge.sge.mr->refcount); + qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; e->opcode = opcode; e->sent = 0; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index eb78d9367f0..4c07a8b34ff 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -110,7 +110,7 @@ bad_lkey: while (j) { struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); @@ -260,12 +260,15 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) /* * - * This should be called with the QP s_lock held. + * This should be called with the QP r_lock held. + * + * The s_lock will be acquired around the qib_migrate_qp() call. */ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, struct qib_qp *qp, u32 bth0) { __be64 guid; + unsigned long flags; if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { if (!has_grh) { @@ -295,7 +298,9 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) goto err; + spin_lock_irqsave(&qp->s_lock, flags); qib_migrate_qp(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); } else { if (!has_grh) { if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) @@ -496,7 +501,7 @@ again: (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->wr.wr.atomic.swap); - atomic_dec(&qp->r_sge.sge.mr->refcount); + qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -520,7 +525,7 @@ again: sge->sge_length -= len; if (sge->sge_length == 0) { if (!release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -537,11 +542,7 @@ again: sqp->s_len -= len; } if (release) - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) goto send_comp; @@ -683,17 +684,17 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = QIB_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; } lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | qp->remote_ah_attr.sl << 4; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | qp->remote_ah_attr.src_path_bits); bth0 |= qib_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; @@ -702,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(bth2); + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } /** @@ -753,7 +755,7 @@ void qib_do_send(struct work_struct *work) * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ - if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, + if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) break; /* Record that s_hdr is empty. */ @@ -777,7 +779,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index e9f9f8bc320..911205d3d5a 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -38,6 +38,7 @@ #include <linux/pci.h> #include <linux/delay.h> +#include <linux/module.h> #include <linux/firmware.h> #include "qib.h" @@ -299,7 +300,7 @@ bail: } static void qib_sd_trimdone_monitor(struct qib_devdata *dd, - const char *where) + const char *where) { int ret, chn, baduns; u64 val; @@ -341,15 +342,17 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0, 0); if (ret < 0) - qib_dev_err(dd, "Failed checking TRIMDONE, chn %d" - " (%s)\n", chn, where); + qib_dev_err(dd, + "Failed checking TRIMDONE, chn %d (%s)\n", + chn, where); if (!(ret & 0x10)) { int probe; baduns |= (1 << chn); - qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)." - " (%s)\n", chn, ret, where); + qib_dev_err(dd, + "TRIMDONE cleared on chn %d (%02X). (%s)\n", + chn, ret, where); probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_PGUDP(0), 0, 0); qib_dev_err(dd, "probe is %d (%02X)\n", @@ -369,13 +372,13 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, /* Read CTRL reg for each channel to check TRIMDONE */ if (baduns & (1 << chn)) { qib_dev_err(dd, - "Reseting TRIMDONE on chn %d (%s)\n", + "Resetting TRIMDONE on chn %d (%s)\n", chn, where); ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0x10, 0x10); if (ret < 0) - qib_dev_err(dd, "Failed re-setting " - "TRIMDONE, chn %d (%s)\n", + qib_dev_err(dd, + "Failed re-setting TRIMDONE, chn %d (%s)\n", chn, where); } } @@ -1143,10 +1146,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "pre-read failed: elt %d," - " addr 0x%X, chnl %d\n", - (sloc & 0xF), - (sloc >> 9) & 0x3f, chnl); + qib_dev_err(dd, + "pre-read failed: elt %d, addr 0x%X, chnl %d\n", + (sloc & 0xF), + (sloc >> 9) & 0x3f, chnl); return ret; } val = (ret & ~mask) | (val & mask); @@ -1156,9 +1159,9 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "Global WR failed: elt %d," - " addr 0x%X, val %02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, val); + qib_dev_err(dd, + "Global WR failed: elt %d, addr 0x%X, val %02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, val); } return ret; } @@ -1172,11 +1175,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "Write failed: elt %d," - " addr 0x%X, chnl %d, val 0x%02X," - " mask 0x%02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, - val & 0xFF, mask & 0xFF); + qib_dev_err(dd, + "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, + val & 0xFF, mask & 0xFF); break; } } diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index cad44491320..c6d6a54d2e1 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,6 +33,7 @@ #include <linux/spinlock.h> #include <linux/netdevice.h> +#include <linux/moduleparam.h> #include "qib.h" #include "qib_common.h" @@ -275,8 +277,8 @@ static int alloc_sdma(struct qib_pportdata *ppd) GFP_KERNEL); if (!ppd->sdma_descq) { - qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor " - "FIFO memory\n"); + qib_dev_err(ppd->dd, + "failed to allocate SendDMA descriptor FIFO memory\n"); goto bail; } @@ -284,8 +286,8 @@ static int alloc_sdma(struct qib_pportdata *ppd) ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev, PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL); if (!ppd->sdma_head_dma) { - qib_dev_err(ppd->dd, "failed to allocate SendDMA " - "head memory\n"); + qib_dev_err(ppd->dd, + "failed to allocate SendDMA head memory\n"); goto cleanup_descq; } ppd->sdma_head_dma[0] = 0; @@ -421,8 +423,11 @@ void qib_sdma_intr(struct qib_pportdata *ppd) void __qib_sdma_intr(struct qib_pportdata *ppd) { - if (__qib_sdma_running(ppd)) + if (__qib_sdma_running(ppd)) { qib_sdma_make_progress(ppd); + if (!list_empty(&ppd->sdma_userpending)) + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + } } int qib_setup_sdma(struct qib_pportdata *ppd) @@ -450,6 +455,9 @@ int qib_setup_sdma(struct qib_pportdata *ppd) ppd->sdma_descq_removed = 0; ppd->sdma_descq_added = 0; + ppd->sdma_intrequest = 0; + INIT_LIST_HEAD(&ppd->sdma_userpending); + INIT_LIST_HEAD(&ppd->sdma_activelist); tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, @@ -706,6 +714,62 @@ unlock: return ret; } +/* + * sdma_lock should be acquired before calling this routine + */ +void dump_sdma_state(struct qib_pportdata *ppd) +{ + struct qib_sdma_desc *descq; + struct qib_sdma_txreq *txp, *txpnext; + __le64 *descqp; + u64 desc[2]; + u64 addr; + u16 gen, dwlen, dwoffset; + u16 head, tail, cnt; + + head = ppd->sdma_descq_head; + tail = ppd->sdma_descq_tail; + cnt = qib_sdma_descq_freecnt(ppd); + descq = ppd->sdma_descq; + + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_head: %u\n", head); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_tail: %u\n", tail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdma_descq_freecnt: %u\n", cnt); + + /* print info for each entry in the descriptor queue */ + while (head != tail) { + char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 }; + + descqp = &descq[head].qw[0]; + desc[0] = le64_to_cpu(descqp[0]); + desc[1] = le64_to_cpu(descqp[1]); + flags[0] = (desc[0] & 1<<15) ? 'I' : '-'; + flags[1] = (desc[0] & 1<<14) ? 'L' : 'S'; + flags[2] = (desc[0] & 1<<13) ? 'H' : '-'; + flags[3] = (desc[0] & 1<<12) ? 'F' : '-'; + flags[4] = (desc[0] & 1<<11) ? 'L' : '-'; + addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL); + gen = (desc[0] >> 30) & 3ULL; + dwlen = (desc[0] >> 14) & (0x7ffULL << 2); + dwoffset = (desc[0] & 0x7ffULL) << 2; + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n", + head, flags, addr, gen, dwlen, dwoffset); + if (++head == ppd->sdma_descq_cnt) + head = 0; + } + + /* print dma descriptor indices from the TX requests */ + list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist, + list) + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n", + txp->start_idx, txp->next_descq_idx); +} + void qib_sdma_process_event(struct qib_pportdata *ppd, enum qib_sdma_events event) { diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c index c3ec8efc2ed..d6235931a1b 100644 --- a/drivers/infiniband/hw/qib/qib_srq.c +++ b/drivers/infiniband/hw/qib/qib_srq.c @@ -107,6 +107,11 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, u32 sz; struct ib_srq *ret; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > ib_qib_max_srq_sges || srq_init_attr->attr.max_wr == 0 || diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index d50a33fe8bb..3c8e4e3caca 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -33,41 +34,7 @@ #include <linux/ctype.h> #include "qib.h" - -/** - * qib_parse_ushort - parse an unsigned short value in an arbitrary base - * @str: the string containing the number - * @valp: where to put the result - * - * Returns the number of bytes consumed, or negative value on error. - */ -static int qib_parse_ushort(const char *str, unsigned short *valp) -{ - unsigned long val; - char *end; - int ret; - - if (!isdigit(str[0])) { - ret = -EINVAL; - goto bail; - } - - val = simple_strtoul(str, &end, 0); - - if (val > 0xffff) { - ret = -EINVAL; - goto bail; - } - - *valp = val; - - ret = end + 1 - str; - if (ret == 0) - ret = -EINVAL; - -bail: - return ret; -} +#include "qib_mad.h" /* start of per-port functions */ /* @@ -90,7 +57,11 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, int ret; u16 val; - ret = qib_parse_ushort(buf, &val); + ret = kstrtou16(buf, 0, &val); + if (ret) { + qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + return ret; + } /* * Set the "intentional" heartbeat enable per either of @@ -99,10 +70,7 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, * because entering loopback mode overrides it and automatically * disables heartbeat. */ - if (ret >= 0) - ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); - if (ret < 0) - qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); return ret < 0 ? ret : count; } @@ -126,12 +94,14 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, int ret; u16 val; - ret = qib_parse_ushort(buf, &val); - if (ret > 0) - qib_set_led_override(ppd, val); - else + ret = kstrtou16(buf, 0, &val); + if (ret) { qib_dev_err(dd, "attempt to set invalid LED override\n"); - return ret < 0 ? ret : count; + return ret; + } + + qib_set_led_override(ppd, val); + return count; } static ssize_t show_status(struct qib_pportdata *ppd, char *buf) @@ -150,7 +120,7 @@ static ssize_t show_status(struct qib_pportdata *ppd, char *buf) * For userland compatibility, these offsets must remain fixed. * They are strings for QIB_STATUS_* */ -static const char *qib_status_str[] = { +static const char * const qib_status_str[] = { "Initted", "", "", @@ -231,6 +201,98 @@ static struct attribute *port_default_attributes[] = { NULL }; +/* + * Start of per-port congestion control structures and support code + */ + +/* + * Congestion control table size followed by table entries + */ +static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->ccti_entries_shadow) + return -EINVAL; + + ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow) + + sizeof(__be16); + + if (pos > ret) + return -EINVAL; + + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->ccti_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static void qib_port_release(struct kobject *kobj) +{ + /* nothing to do since memory is freed by qib_free_devdata() */ +} + +static struct kobj_type qib_port_cc_ktype = { + .release = qib_port_release, +}; + +static struct bin_attribute cc_table_bin_attr = { + .attr = {.name = "cc_table_bin", .mode = 0444}, + .read = read_cc_table_bin, + .size = PAGE_SIZE, +}; + +/* + * Congestion settings: port control, control map and an array of 16 + * entries for the congestion entries - increase, timer, event log + * trigger threshold and the minimum injection rate delay. + */ +static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return -EINVAL; + + ret = sizeof(struct ib_cc_congestion_setting_attr_shadow); + + if (pos > ret) + return -EINVAL; + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->congestion_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static struct bin_attribute cc_setting_bin_attr = { + .attr = {.name = "cc_settings_bin", .mode = 0444}, + .read = read_cc_setting_bin, + .size = PAGE_SIZE, +}; + + static ssize_t qib_portattr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -253,10 +315,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj, return pattr->store(ppd, buf, len); } -static void qib_port_release(struct kobject *kobj) -{ - /* nothing to do since memory is freed by qib_free_devdata() */ -} static const struct sysfs_ops qib_port_ops = { .show = qib_portattr_show, @@ -411,12 +469,12 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - char *endp; - long val = simple_strtol(buf, &endp, 0); - - if (val < 0 || endp == buf) - return -EINVAL; + u32 val; + int ret; + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; *(u32 *)((char *) qibp + dattr->counter) = val; return size; } @@ -503,8 +561,22 @@ static ssize_t show_nctxts(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts - - dd->first_user_ctxt); + /* The calculation below deals with a special case where + * cfgctxts is set to 1 on a single-port board. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? 0 : + (dd->cfgctxts - dd->first_user_ctxt)); +} + +static ssize_t show_nfreectxts(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* Return the number of free user ports (contexts) available. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); } static ssize_t show_serial(struct device *device, @@ -604,6 +676,7 @@ static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); +static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); @@ -617,6 +690,7 @@ static struct device_attribute *qib_attributes[] = { &dev_attr_board_id, &dev_attr_version, &dev_attr_nctxts, + &dev_attr_nfreectxts, &dev_attr_serial, &dev_attr_boardversion, &dev_attr_logged_errors, @@ -633,8 +707,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, int ret; if (!port_num || port_num > dd->num_pports) { - qib_dev_err(dd, "Skipping infiniband class with " - "invalid port %u\n", port_num); + qib_dev_err(dd, + "Skipping infiniband class with invalid port %u\n", + port_num); ret = -ENODEV; goto bail; } @@ -643,8 +718,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj, "linkcontrol"); if (ret) { - qib_dev_err(dd, "Skipping linkcontrol sysfs info, " - "(err %d) port %u\n", ret, port_num); + qib_dev_err(dd, + "Skipping linkcontrol sysfs info, (err %d) port %u\n", + ret, port_num); goto bail; } kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); @@ -652,26 +728,70 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj, "sl2vl"); if (ret) { - qib_dev_err(dd, "Skipping sl2vl sysfs info, " - "(err %d) port %u\n", ret, port_num); - goto bail_sl; + qib_dev_err(dd, + "Skipping sl2vl sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_link; } kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj, "diag_counters"); if (ret) { - qib_dev_err(dd, "Skipping diag_counters sysfs info, " - "(err %d) port %u\n", ret, port_num); - goto bail_diagc; + qib_dev_err(dd, + "Skipping diag_counters sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_sl; } kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return 0; + + ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype, + kobj, "CCMgtA"); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_diagc; + } + + kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control setting sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc; + } + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control table sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc_entry_bin; + } + + qib_devinfo(dd->pcidev, + "IB%u: Congestion Control Agent enabled for port %d\n", + dd->unit, port_num); + return 0; +bail_cc_entry_bin: + sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr); +bail_cc: + kobject_put(&ppd->pport_cc_kobj); bail_diagc: - kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->diagc_kobj); bail_sl: + kobject_put(&ppd->sl2vl_kobj); +bail_link: kobject_put(&ppd->pport_kobj); bail: return ret; @@ -688,10 +808,14 @@ int qib_verbs_register_sysfs(struct qib_devdata *dd) for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) { ret = device_create_file(&dev->dev, qib_attributes[i]); if (ret) - return ret; + goto bail; } return 0; +bail: + for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) + device_remove_file(&dev->dev, qib_attributes[i]); + return ret; } /* @@ -704,7 +828,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd) for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; - kobject_put(&ppd->pport_kobj); + if (qib_cc_table_size && + ppd->congestion_entries_shadow) { + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + kobject_put(&ppd->pport_cc_kobj); + } kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->pport_kobj); } } diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index 6f31ca5039d..647f7beb1b0 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,7 +42,7 @@ * QLogic_IB "Two Wire Serial Interface" driver. * Originally written for a not-quite-i2c serial eeprom, which is * still used on some supported boards. Later boards have added a - * variety of other uses, most board-specific, so teh bit-boffing + * variety of other uses, most board-specific, so the bit-boffing * part has been split off to this file, while the other parts * have been moved to chip-specific files. * @@ -449,8 +450,9 @@ int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, goto failed_write; ret = qib_twsi_wr(dd, addr, 0); if (ret) { - qib_dev_err(dd, "Failed to write interface" - " write addr %02X\n", addr); + qib_dev_err(dd, + "Failed to write interface write addr %02X\n", + addr); goto failed_write; } } diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 7f36454c225..31d3561400a 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -36,6 +36,7 @@ #include <linux/delay.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> +#include <linux/moduleparam.h> #include "qib.h" @@ -294,6 +295,7 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, nbufs = last - first + 1; /* number in range to check */ if (dd->upd_pio_shadow) { +update_shadow: /* * Minor optimization. If we had no buffers on last call, * start out by doing the update; continue and do scan even @@ -303,37 +305,39 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, updated++; } i = first; -rescan: /* * While test_and_set_bit() is atomic, we do that and then the * change_bit(), and the pair is not. See if this is the cause * of the remaining armlaunch errors. */ spin_lock_irqsave(&dd->pioavail_lock, flags); + if (dd->last_pio >= first && dd->last_pio <= last) + i = dd->last_pio + 1; + if (!first) + /* adjust to min possible */ + nbufs = last - dd->min_kernel_pio + 1; for (j = 0; j < nbufs; j++, i++) { if (i > last) - i = first; + i = !first ? dd->min_kernel_pio : first; if (__test_and_set_bit((2 * i) + 1, shadow)) continue; /* flip generation bit */ __change_bit(2 * i, shadow); /* remember that the buffer can be written to now */ __set_bit(i, dd->pio_writing); + if (!first && first != last) /* first == last on VL15, avoid */ + dd->last_pio = i; break; } spin_unlock_irqrestore(&dd->pioavail_lock, flags); if (j == nbufs) { - if (!updated) { + if (!updated) /* * First time through; shadow exhausted, but may be * buffers available, try an update and then rescan. */ - update_send_bufs(dd); - updated++; - i = first; - goto rescan; - } + goto update_shadow; no_send_bufs(dd); buf = NULL; } else { @@ -421,14 +425,20 @@ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + start, dd->pioavailshadow); __set_bit(start, dd->pioavailkernel); + if ((start >> 1) < dd->min_kernel_pio) + dd->min_kernel_pio = start >> 1; } else { __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT, dd->pioavailshadow); __clear_bit(start, dd->pioavailkernel); + if ((start >> 1) > dd->min_kernel_pio) + dd->min_kernel_pio = start >> 1; } start += 2; } + if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1) + dd->last_pio = dd->min_kernel_pio - 1; spin_unlock_irqrestore(&dd->pioavail_lock, flags); dd->f_txchk_change(dd, ostart, len, avail, rcd); diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 32ccf3c824c..aa3a8035bb6 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -51,7 +51,7 @@ int qib_make_uc_req(struct qib_qp *qp) u32 hwords; u32 bth0; u32 len; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; int ret = 0; spin_lock_irqsave(&qp->s_lock, flags); @@ -72,9 +72,9 @@ int qib_make_uc_req(struct qib_qp *qp) goto done; } - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -243,13 +243,12 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, void *data, u32 tlen, struct qib_qp *qp) { struct qib_other_headers *ohdr; - unsigned long flags; u32 opcode; u32 hdrsize; u32 psn; u32 pad; struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; struct ib_reth *reth; int ret; @@ -263,14 +262,11 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } opcode = be32_to_cpu(ohdr->bth[0]); - spin_lock_irqsave(&qp->s_lock, flags); if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) - goto sunlock; - spin_unlock_irqrestore(&qp->s_lock, flags); + return; psn = be32_to_cpu(ohdr->bth[2]); opcode >>= 24; - memset(&wc, 0, sizeof wc); /* Compare the PSN verses the expected PSN. */ if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) { @@ -285,11 +281,7 @@ inv: set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -370,7 +362,7 @@ send_first: } qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) - goto send_last; + goto no_immediate_data; else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) goto send_last_imm; /* FALLTHROUGH */ @@ -389,8 +381,11 @@ send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ + goto send_last; case OP(SEND_LAST): +no_immediate_data: + wc.ex.imm_data = 0; + wc.wc_flags = 0; send_last: /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; @@ -404,20 +399,20 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto rewind; wc.opcode = IB_WC_RECV; -last_imm: qib_copy_sge(&qp->r_sge, data, tlen, 0); - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); +last_imm: wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -489,13 +484,7 @@ rdma_last_imm: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr-> - refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); else { ret = qib_get_rwqe(qp, 1); if (ret < 0) @@ -505,6 +494,8 @@ rdma_last_imm: } wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + qib_put_ss(&qp->r_sge); goto last_imm; case OP(RDMA_WRITE_LAST): @@ -520,11 +511,7 @@ rdma_last: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; qib_copy_sge(&qp->r_sge, data, tlen, 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); break; default: @@ -546,6 +533,4 @@ op_err: qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; -sunlock: - spin_unlock_irqrestore(&qp->s_lock, flags); } diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index e1b3da2a1f8..aaf7039f8ed 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) struct qib_sge *sge; struct ib_wc wc; u32 length; + enum ib_qp_type sqptype, dqptype; qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); if (!qp) { ibp->n_pkt_drops++; return; } - if (qp->ibqp.qp_type != sqp->ibqp.qp_type || + + sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : sqp->ibqp.qp_type; + dqptype = qp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : qp->ibqp.qp_type; + + if (dqptype != sqptype || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { ibp->n_pkt_drops++; goto drop; @@ -116,7 +123,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) } /* - * A GRH is expected to preceed the data even if not + * A GRH is expected to precede the data even if not * present on the wire. */ length = swqe->length; @@ -194,11 +201,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) } length -= len; } - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; @@ -277,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp) ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) - ibp->n_multicast_xmit++; + this_cpu_inc(ibp->pmastats->n_multicast_xmit); else - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } else { - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { /* @@ -321,11 +324,11 @@ int qib_make_ud_req(struct qib_qp *qp) if (ah_attr->ah_flags & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, &ah_attr->grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -333,7 +336,7 @@ int qib_make_ud_req(struct qib_qp *qp) } else { /* Header size in 32-bit words. */ lrh0 = QIB_LRH_BTH; - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -346,15 +349,15 @@ int qib_make_ud_req(struct qib_qp *qp) lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ else lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); lid = ppd->lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); - qp->s_hdr.lrh[3] = cpu_to_be16(lid); + qp->s_hdr->lrh[3] = cpu_to_be16(lid); } else - qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; + qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; @@ -445,13 +448,14 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, qkey = be32_to_cpu(ohdr->u.ud.deth[0]); src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK; - /* Get the number of bytes the message was padded by. */ + /* + * Get the number of bytes the message was padded by + * and drop incomplete packets. + */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - if (unlikely(tlen < (hdrsize + pad + 4))) { - /* Drop incomplete packets. */ - ibp->n_pkt_drops++; - goto bail; - } + if (unlikely(tlen < (hdrsize + pad + 4))) + goto drop; + tlen -= hdrsize + pad + 4; /* @@ -460,10 +464,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, */ if (qp->ibqp.qp_num) { if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE)) { - ibp->n_pkt_drops++; - goto bail; - } + hdr->lrh[3] == IB_LID_PERMISSIVE)) + goto drop; if (qp->ibqp.qp_num > 1) { u16 pkey1, pkey2; @@ -476,7 +478,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, 0xF, src_qp, qp->ibqp.qp_num, hdr->lrh[3], hdr->lrh[1]); - goto bail; + return; } } if (unlikely(qkey != qp->qkey)) { @@ -484,30 +486,24 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, src_qp, qp->ibqp.qp_num, hdr->lrh[3], hdr->lrh[1]); - goto bail; + return; } /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && (tlen != 256 || - (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) { - ibp->n_pkt_drops++; - goto bail; - } + (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) + goto drop; } else { struct ib_smp *smp; /* Drop invalid MAD packets (see 13.5.3.1). */ - if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) { - ibp->n_pkt_drops++; - goto bail; - } + if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) + goto drop; smp = (struct ib_smp *) data; if ((hdr->lrh[1] == IB_LID_PERMISSIVE || hdr->lrh[3] == IB_LID_PERMISSIVE) && - smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - ibp->n_pkt_drops++; - goto bail; - } + smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + goto drop; } /* @@ -519,17 +515,15 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - hdrsize += sizeof(u32); + tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; - } else { - ibp->n_pkt_drops++; - goto bail; - } + } else + goto drop; /* - * A GRH is expected to preceed the data even if not + * A GRH is expected to precede the data even if not * present on the wire. */ wc.byte_len = tlen + sizeof(struct ib_grh); @@ -556,8 +550,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, /* Silently drop packets which are too big. */ if (unlikely(wc.byte_len > qp->r_len)) { qp->r_flags |= QIB_R_REUSE_SGE; - ibp->n_pkt_drops++; - return; + goto drop; } if (has_grh) { qib_copy_sge(&qp->r_sge, &hdr->u.l.grh, @@ -566,11 +559,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } else qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; @@ -594,5 +583,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & cpu_to_be32(IB_BTH_SOLICITED)) != 0); -bail:; + return; + +drop: + ibp->n_pkt_drops++; } diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index d7a26c1d4f3..2bc1d2b9629 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -51,8 +51,8 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, /* * Call with current->mm->mmap_sem held. */ -static int __get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p, struct vm_area_struct **vma) +static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, + struct page **p, struct vm_area_struct **vma) { unsigned long lock_limit; size_t got; @@ -74,7 +74,7 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages, goto bail_release; } - current->mm->locked_vm += num_pages; + current->mm->pinned_vm += num_pages; ret = 0; goto bail; @@ -136,7 +136,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages, down_write(¤t->mm->mmap_sem); - ret = __get_user_pages(start_page, num_pages, p, NULL); + ret = __qib_get_user_pages(start_page, num_pages, p, NULL); up_write(¤t->mm->mmap_sem); @@ -151,7 +151,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages) __qib_release_user_pages(p, num_pages, 1); if (current->mm) { - current->mm->locked_vm -= num_pages; + current->mm->pinned_vm -= num_pages; up_write(¤t->mm->mmap_sem); } } diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 4c19e06b5e8..d2806cae234 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -52,21 +52,48 @@ /* attempt to drain the queue for 5secs */ #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 +/* + * track how many times a process open this driver. + */ +static struct rb_root qib_user_sdma_rb_root = RB_ROOT; + +struct qib_user_sdma_rb_node { + struct rb_node node; + int refcount; + pid_t pid; +}; + struct qib_user_sdma_pkt { - u8 naddr; /* dimension of addr (1..3) ... */ + struct list_head list; /* list element */ + + u8 tiddma; /* if this is NEW tid-sdma */ + u8 largepkt; /* this is large pkt from kmalloc */ + u16 frag_size; /* frag size used by PSM */ + u16 index; /* last header index or push index */ + u16 naddr; /* dimension of addr (1..3) ... */ + u16 addrlimit; /* addr array size */ + u16 tidsmidx; /* current tidsm index */ + u16 tidsmcount; /* tidsm array item count */ + u16 payload_size; /* payload size so far for header */ + u32 bytes_togo; /* bytes for processing */ u32 counter; /* sdma pkts queued counter for this entry */ + struct qib_tid_session_member *tidsm; /* tid session member array */ + struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ u64 added; /* global descq number of entries */ struct { - u32 offset; /* offset for kvaddr, addr */ - u32 length; /* length in page */ - u8 put_page; /* should we put_page? */ - u8 dma_mapped; /* is page dma_mapped? */ + u16 offset; /* offset for kvaddr, addr */ + u16 length; /* length in page */ + u16 first_desc; /* first desc */ + u16 last_desc; /* last desc */ + u16 put_page; /* should we put_page? */ + u16 dma_mapped; /* is page dma_mapped? */ + u16 dma_length; /* for dma_unmap_page() */ + u16 padding; struct page *page; /* may be NULL (coherent mem) */ void *kvaddr; /* FIXME: only for pio hack */ dma_addr_t addr; } addr[4]; /* max pages, any more and we coalesce */ - struct list_head list; /* list element */ }; struct qib_user_sdma_queue { @@ -77,6 +104,12 @@ struct qib_user_sdma_queue { */ struct list_head sent; + /* + * Because above list will be accessed by both process and + * signal handler, we need a spinlock for it. + */ + spinlock_t sent_lock ____cacheline_aligned_in_smp; + /* headers with expected length are allocated from here... */ char header_cache_name[64]; struct dma_pool *header_cache; @@ -88,27 +121,83 @@ struct qib_user_sdma_queue { /* as packets go on the queued queue, they are counted... */ u32 counter; u32 sent_counter; + /* pending packets, not sending yet */ + u32 num_pending; + /* sending packets, not complete yet */ + u32 num_sending; + /* global descq number of entry of last sending packet */ + u64 added; /* dma page table */ struct rb_root dma_pages_root; + struct qib_user_sdma_rb_node *sdma_rb_node; + /* protect everything above... */ struct mutex lock; }; +static struct qib_user_sdma_rb_node * +qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) +{ + struct qib_user_sdma_rb_node *sdma_rb_node; + struct rb_node *node = root->rb_node; + + while (node) { + sdma_rb_node = container_of(node, + struct qib_user_sdma_rb_node, node); + if (pid < sdma_rb_node->pid) + node = node->rb_left; + else if (pid > sdma_rb_node->pid) + node = node->rb_right; + else + return sdma_rb_node; + } + return NULL; +} + +static int +qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) +{ + struct rb_node **node = &(root->rb_node); + struct rb_node *parent = NULL; + struct qib_user_sdma_rb_node *got; + + while (*node) { + got = container_of(*node, struct qib_user_sdma_rb_node, node); + parent = *node; + if (new->pid < got->pid) + node = &((*node)->rb_left); + else if (new->pid > got->pid) + node = &((*node)->rb_right); + else + return 0; + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, root); + return 1; +} + struct qib_user_sdma_queue * qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) { struct qib_user_sdma_queue *pq = kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); + struct qib_user_sdma_rb_node *sdma_rb_node; if (!pq) goto done; pq->counter = 0; pq->sent_counter = 0; - INIT_LIST_HEAD(&pq->sent); + pq->num_pending = 0; + pq->num_sending = 0; + pq->added = 0; + pq->sdma_rb_node = NULL; + INIT_LIST_HEAD(&pq->sent); + spin_lock_init(&pq->sent_lock); mutex_init(&pq->lock); snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), @@ -131,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->dma_pages_root = RB_ROOT; + sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, + current->pid); + if (sdma_rb_node) { + sdma_rb_node->refcount++; + } else { + int ret; + sdma_rb_node = kmalloc(sizeof( + struct qib_user_sdma_rb_node), GFP_KERNEL); + if (!sdma_rb_node) + goto err_rb; + + sdma_rb_node->refcount = 1; + sdma_rb_node->pid = current->pid; + + ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, + sdma_rb_node); + BUG_ON(ret == 0); + } + pq->sdma_rb_node = sdma_rb_node; + goto done; +err_rb: + dma_pool_destroy(pq->header_cache); err_slab: kmem_cache_destroy(pq->pkt_slab); err_kfree: @@ -144,34 +255,310 @@ done: } static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, - int i, size_t offset, size_t len, - int put_page, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) + int i, u16 offset, u16 len, + u16 first_desc, u16 last_desc, + u16 put_page, u16 dma_mapped, + struct page *page, void *kvaddr, + dma_addr_t dma_addr, u16 dma_length) { pkt->addr[i].offset = offset; pkt->addr[i].length = len; + pkt->addr[i].first_desc = first_desc; + pkt->addr[i].last_desc = last_desc; pkt->addr[i].put_page = put_page; pkt->addr[i].dma_mapped = dma_mapped; pkt->addr[i].page = page; pkt->addr[i].kvaddr = kvaddr; pkt->addr[i].addr = dma_addr; + pkt->addr[i].dma_length = dma_length; +} + +static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, + size_t len, dma_addr_t *dma_addr) +{ + void *hdr; + + if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) + hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL, + dma_addr); + else + hdr = NULL; + + if (!hdr) { + hdr = kmalloc(len, GFP_KERNEL); + if (!hdr) + return NULL; + + *dma_addr = 0; + } + + return hdr; } -static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt, - u32 counter, size_t offset, - size_t len, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) +static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + struct page *page, u16 put, + u16 offset, u16 len, void *kvaddr) { - pkt->naddr = 1; - pkt->counter = counter; - qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page, - kvaddr, dma_addr); + __le16 *pbc16; + void *pbcvaddr; + struct qib_message_header *hdr; + u16 newlen, pbclen, lastdesc, dma_mapped; + u32 vcto; + union qib_seqnum seqnum; + dma_addr_t pbcdaddr; + dma_addr_t dma_addr = + dma_map_page(&dd->pcidev->dev, + page, offset, len, DMA_TO_DEVICE); + int ret = 0; + + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + /* + * dma mapping error, pkt has not managed + * this page yet, return the page here so + * the caller can ignore this page. + */ + if (put) { + put_page(page); + } else { + /* coalesce case */ + kunmap(page); + __free_page(page); + } + ret = -ENOMEM; + goto done; + } + offset = 0; + dma_mapped = 1; + + +next_fragment: + + /* + * In tid-sdma, the transfer length is restricted by + * receiver side current tid page length. + */ + if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length) + newlen = pkt->tidsm[pkt->tidsmidx].length; + else + newlen = len; + + /* + * Then the transfer length is restricted by MTU. + * the last descriptor flag is determined by: + * 1. the current packet is at frag size length. + * 2. the current tid page is done if tid-sdma. + * 3. there is no more byte togo if sdma. + */ + lastdesc = 0; + if ((pkt->payload_size + newlen) >= pkt->frag_size) { + newlen = pkt->frag_size - pkt->payload_size; + lastdesc = 1; + } else if (pkt->tiddma) { + if (newlen == pkt->tidsm[pkt->tidsmidx].length) + lastdesc = 1; + } else { + if (newlen == pkt->bytes_togo) + lastdesc = 1; + } + + /* fill the next fragment in this page */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + offset, newlen, /* offset, len */ + 0, lastdesc, /* first last desc */ + put, dma_mapped, /* put page, dma mapped */ + page, kvaddr, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->bytes_togo -= newlen; + pkt->payload_size += newlen; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* If there is no more byte togo. (lastdesc==1) */ + if (pkt->bytes_togo == 0) { + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + goto done; + } + + /* If tid-sdma, advance tid info. */ + if (pkt->tiddma) { + pkt->tidsm[pkt->tidsmidx].length -= newlen; + if (pkt->tidsm[pkt->tidsmidx].length) { + pkt->tidsm[pkt->tidsmidx].offset += newlen; + } else { + pkt->tidsmidx++; + if (pkt->tidsmidx == pkt->tidsmcount) { + ret = -EFAULT; + goto done; + } + } + } + + /* + * If this is NOT the last descriptor. (newlen==len) + * the current packet is not done yet, but the current + * send side page is done. + */ + if (lastdesc == 0) + goto done; + + /* + * If running this driver under PSM with message size + * fitting into one transfer unit, it is not possible + * to pass this line. otherwise, it is a buggggg. + */ + + /* + * Since the current packet is done, and there are more + * bytes togo, we need to create a new sdma header, copying + * from previous sdma header and modify both. + */ + pbclen = pkt->addr[pkt->index].length; + pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr); + if (!pbcvaddr) { + ret = -ENOMEM; + goto done; + } + /* Copy the previous sdma header to new sdma header */ + pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr; + memcpy(pbcvaddr, pbc16, pbclen); + + /* Modify the previous sdma header */ + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* turn on the header suppression */ + hdr->iph.pkt_flags = + cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2); + /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */ + hdr->flags &= ~(0x04|0x20); + } else { + /* turn off extra bytes: 20-21 bits */ + hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF); + /* turn off ACK_REQ: 0x04 */ + hdr->flags &= ~(0x04); + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + /* Modify the new sdma header */ + pbc16 = (__le16 *)pbcvaddr; + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* Set new tid and offset for new sdma header */ + hdr->iph.ver_ctxt_tid_offset = cpu_to_le32( + (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) + + (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) + + (pkt->tidsm[pkt->tidsmidx].offset>>2)); + } else { + /* Middle protocol new packet offset */ + hdr->uwords[2] += pkt->payload_size; + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* Next sequence number in new sdma header */ + seqnum.val = be32_to_cpu(hdr->bth[2]); + if (pkt->tiddma) + seqnum.seq++; + else + seqnum.pkt++; + hdr->bth[2] = cpu_to_be32(seqnum.val); + + /* Init new sdma header. */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + 0, pbclen, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbcvaddr, /* struct page, virt addr */ + pbcdaddr, pbclen); /* dma addr, dma length */ + pkt->index = pkt->naddr; + pkt->payload_size = 0; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* Prepare for next fragment in this page */ + if (newlen != len) { + if (dma_mapped) { + put = 0; + dma_mapped = 0; + page = NULL; + kvaddr = NULL; + } + len -= newlen; + offset += newlen; + + goto next_fragment; + } + +done: + return ret; } /* we've too many pages in the iovec, coalesce to a single page */ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, const struct iovec *iov, unsigned long niov) @@ -182,7 +569,6 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, char *mpage; int i; int len = 0; - dma_addr_t dma_addr; if (!page) { ret = -ENOMEM; @@ -205,17 +591,8 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, len += iov[i].iov_len; } - dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto free_unmap; - } - - qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save, - dma_addr); - pkt->naddr = 2; - + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + page, 0, 0, len, mpage_save); goto done; free_unmap: @@ -238,16 +615,6 @@ static int qib_user_sdma_num_pages(const struct iovec *iov) return 1 + ((epage - spage) >> PAGE_SHIFT); } -/* - * Truncate length to page boundry. - */ -static int qib_user_sdma_page_length(unsigned long addr, unsigned long len) -{ - const unsigned long offset = addr & ~PAGE_MASK; - - return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len; -} - static void qib_user_sdma_free_pkt_frag(struct device *dev, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, @@ -256,10 +623,11 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, const int i = frag; if (pkt->addr[i].page) { + /* only user data has page */ if (pkt->addr[i].dma_mapped) dma_unmap_page(dev, pkt->addr[i].addr, - pkt->addr[i].length, + pkt->addr[i].dma_length, DMA_TO_DEVICE); if (pkt->addr[i].kvaddr) @@ -269,55 +637,80 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, put_page(pkt->addr[i].page); else __free_page(pkt->addr[i].page); - } else if (pkt->addr[i].kvaddr) - /* free coherent mem from cache... */ - dma_pool_free(pq->header_cache, + } else if (pkt->addr[i].kvaddr) { + /* for headers */ + if (pkt->addr[i].dma_mapped) { + /* from kmalloc & dma mapped */ + dma_unmap_single(dev, + pkt->addr[i].addr, + pkt->addr[i].dma_length, + DMA_TO_DEVICE); + kfree(pkt->addr[i].kvaddr); + } else if (pkt->addr[i].addr) { + /* free coherent mem from cache... */ + dma_pool_free(pq->header_cache, pkt->addr[i].kvaddr, pkt->addr[i].addr); + } else { + /* from kmalloc but not dma mapped */ + kfree(pkt->addr[i].kvaddr); + } + } } /* return number of pages pinned... */ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, unsigned long addr, int tlen, int npages) { - struct page *pages[2]; - int j; - int ret; - - ret = get_user_pages(current, current->mm, addr, - npages, 0, 1, pages, NULL); - - if (ret != npages) { - int i; - - for (i = 0; i < ret; i++) - put_page(pages[i]); - - ret = -ENOMEM; - goto done; - } + struct page *pages[8]; + int i, j; + int ret = 0; - for (j = 0; j < npages; j++) { - /* map the pages... */ - const int flen = qib_user_sdma_page_length(addr, tlen); - dma_addr_t dma_addr = - dma_map_page(&dd->pcidev->dev, - pages[j], 0, flen, DMA_TO_DEVICE); - unsigned long fofs = addr & ~PAGE_MASK; + while (npages) { + if (npages > 8) + j = 8; + else + j = npages; - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = get_user_pages_fast(addr, j, 0, pages); + if (ret != j) { + i = 0; + j = ret; ret = -ENOMEM; - goto done; + goto free_pages; } - qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1, - pages[j], kmap(pages[j]), dma_addr); + for (i = 0; i < j; i++) { + /* map the pages... */ + unsigned long fofs = addr & ~PAGE_MASK; + int flen = ((fofs + tlen) > PAGE_SIZE) ? + (PAGE_SIZE - fofs) : tlen; + + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + pages[i], 1, fofs, flen, NULL); + if (ret < 0) { + /* current page has beed taken + * care of inside above call. + */ + i++; + goto free_pages; + } + + addr += flen; + tlen -= flen; + } - pkt->naddr++; - addr += flen; - tlen -= flen; + npages -= j; } + goto done; + + /* if error, return all pages not managed by pkt */ +free_pages: + while (i < j) + put_page(pages[i++]); + done: return ret; } @@ -335,7 +728,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, const int npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; - ret = qib_user_sdma_pin_pages(dd, pkt, addr, + ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, iov[idx].iov_len, npages); if (ret < 0) goto free_pkt; @@ -344,9 +737,22 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, goto done; free_pkt: - for (idx = 0; idx < pkt->naddr; idx++) + /* we need to ignore the first entry here */ + for (idx = 1; idx < pkt->naddr; idx++) qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); + /* need to dma unmap the first entry, this is to restore to + * the original state so that caller can free the memory in + * error condition. Caller does not know if dma mapped or not*/ + if (pkt->addr[0].dma_mapped) { + dma_unmap_single(&dd->pcidev->dev, + pkt->addr[0].addr, + pkt->addr[0].dma_length, + DMA_TO_DEVICE); + pkt->addr[0].addr = 0; + pkt->addr[0].dma_mapped = 0; + } + done: return ret; } @@ -359,8 +765,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd, { int ret = 0; - if (npages >= ARRAY_SIZE(pkt->addr)) - ret = qib_user_sdma_coalesce(dd, pkt, iov, niov); + if (pkt->frag_size == pkt->bytes_togo && + npages >= ARRAY_SIZE(pkt->addr)) + ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov); else ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); @@ -380,8 +787,12 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, for (i = 0; i < pkt->naddr; i++) qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); - kmem_cache_free(pq->pkt_slab, pkt); + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); } + INIT_LIST_HEAD(list); } /* @@ -392,63 +803,48 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, * as, if there is an error we clean it... */ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, + struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq, - struct list_head *list, const struct iovec *iov, unsigned long niov, - int maxpkts) + struct list_head *list, + int *maxpkts, int *ndesc) { unsigned long idx = 0; int ret = 0; int npkts = 0; - struct page *page = NULL; __le32 *pbc; dma_addr_t dma_addr; struct qib_user_sdma_pkt *pkt = NULL; size_t len; size_t nw; u32 counter = pq->counter; - int dma_mapped = 0; + u16 frag_size; - while (idx < niov && npkts < maxpkts) { + while (idx < niov && npkts < *maxpkts) { const unsigned long addr = (unsigned long) iov[idx].iov_base; const unsigned long idx_save = idx; unsigned pktnw; unsigned pktnwc; int nfrags = 0; int npages = 0; + int bytes_togo = 0; + int tiddma = 0; int cfur; - dma_mapped = 0; len = iov[idx].iov_len; nw = len >> 2; - page = NULL; - - pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); - if (!pkt) { - ret = -ENOMEM; - goto free_list; - } if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH || len > PAGE_SIZE || len & 3 || addr & 3) { ret = -EINVAL; - goto free_pkt; + goto free_list; } - if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) - pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL, - &dma_addr); - else - pbc = NULL; - + pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr); if (!pbc) { - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto free_pkt; - } - pbc = kmap(page); + ret = -ENOMEM; + goto free_list; } cfur = copy_from_user(pbc, iov[idx].iov_base, len); @@ -473,8 +869,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, * we can verify that the packet is consistent with the * iovec lengths. */ - pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK; - if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) { + pktnw = le32_to_cpu(*pbc) & 0xFFFF; + if (pktnw < pktnwc) { ret = -EINVAL; goto free_pbc; } @@ -485,17 +881,14 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, const unsigned long faddr = (unsigned long) iov[idx].iov_base; - if (slen & 3 || faddr & 3 || !slen || - slen > PAGE_SIZE) { + if (slen & 3 || faddr & 3 || !slen) { ret = -EINVAL; goto free_pbc; } - npages++; - if ((faddr & PAGE_MASK) != - ((faddr + slen - 1) & PAGE_MASK)) - npages++; + npages += qib_user_sdma_num_pages(&iov[idx]); + bytes_togo += slen; pktnwc += slen >> 2; idx++; nfrags++; @@ -506,48 +899,139 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, goto free_pbc; } - if (page) { - dma_addr = dma_map_page(&dd->pcidev->dev, - page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF; + if (((frag_size ? frag_size : bytes_togo) + len) > + ppd->ibmaxlen) { + ret = -EINVAL; + goto free_pbc; + } + + if (frag_size) { + int pktsize, tidsmsize, n; + + n = npages*((2*PAGE_SIZE/frag_size)+1); + pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n; + + /* + * Determine if this is tid-sdma or just sdma. + */ + tiddma = (((le32_to_cpu(pbc[7])>> + QLOGIC_IB_I_TID_SHIFT)& + QLOGIC_IB_I_TID_MASK) != + QLOGIC_IB_I_TID_MASK); + + if (tiddma) + tidsmsize = iov[idx].iov_len; + else + tidsmsize = 0; + + pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (!pkt) { ret = -ENOMEM; goto free_pbc; } + pkt->largepkt = 1; + pkt->frag_size = frag_size; + pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + + if (tiddma) { + char *tidsm = (char *)pkt + pktsize; + cfur = copy_from_user(tidsm, + iov[idx].iov_base, tidsmsize); + if (cfur) { + ret = -EFAULT; + goto free_pkt; + } + pkt->tidsm = + (struct qib_tid_session_member *)tidsm; + pkt->tidsmcount = tidsmsize/ + sizeof(struct qib_tid_session_member); + pkt->tidsmidx = 0; + idx++; + } - dma_mapped = 1; + /* + * pbc 'fill1' field is borrowed to pass frag size, + * we need to clear it after picking frag size, the + * hardware requires this field to be zero. + */ + *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF); + } else { + pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); + if (!pkt) { + ret = -ENOMEM; + goto free_pbc; + } + pkt->largepkt = 0; + pkt->frag_size = bytes_togo; + pkt->addrlimit = ARRAY_SIZE(pkt->addr); } - - qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped, - page, pbc, dma_addr); + pkt->bytes_togo = bytes_togo; + pkt->payload_size = 0; + pkt->counter = counter; + pkt->tiddma = tiddma; + + /* setup the first header */ + qib_user_sdma_init_frag(pkt, 0, /* index */ + 0, len, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbc, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->index = 0; + pkt->naddr = 1; if (nfrags) { ret = qib_user_sdma_init_payload(dd, pq, pkt, iov + idx_save + 1, nfrags, npages); if (ret < 0) - goto free_pbc_dma; + goto free_pkt; + } else { + /* since there is no payload, mark the + * header as the last desc. */ + pkt->addr[0].last_desc = 1; + + if (dma_addr == 0) { + /* + * the header is not dma mapped yet. + * it should be from kmalloc. + */ + dma_addr = dma_map_single(&dd->pcidev->dev, + pbc, len, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + dma_addr)) { + ret = -ENOMEM; + goto free_pkt; + } + pkt->addr[0].addr = dma_addr; + pkt->addr[0].dma_mapped = 1; + } } counter++; npkts++; + pkt->pq = pq; + pkt->index = 0; /* reset index for push on hw */ + *ndesc += pkt->naddr; list_add_tail(&pkt->list, list); } + *maxpkts = npkts; ret = idx; goto done; -free_pbc_dma: - if (dma_mapped) - dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE); +free_pkt: + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); free_pbc: - if (page) { - kunmap(page); - __free_page(page); - } else + if (dma_addr) dma_pool_free(pq->header_cache, pbc, dma_addr); -free_pkt: - kmem_cache_free(pq->pkt_slab, pkt); + else + kfree(pbc); free_list: qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); done: @@ -568,10 +1052,20 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, struct list_head free_list; struct qib_user_sdma_pkt *pkt; struct qib_user_sdma_pkt *pkt_prev; + unsigned long flags; int ret = 0; + if (!pq->num_sending) + return 0; + INIT_LIST_HEAD(&free_list); + /* + * We need this spin lock here because interrupt handler + * might modify this list in qib_user_sdma_send_desc(), also + * we can not get interrupted, otherwise it is a deadlock. + */ + spin_lock_irqsave(&pq->sent_lock, flags); list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { s64 descd = ppd->sdma_descq_removed - pkt->added; @@ -582,7 +1076,9 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, /* one more packet cleaned */ ret++; + pq->num_sending--; } + spin_unlock_irqrestore(&pq->sent_lock, flags); if (!list_empty(&free_list)) { u32 counter; @@ -603,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) if (!pq) return; - kmem_cache_destroy(pq->pkt_slab); + pq->sdma_rb_node->refcount--; + if (pq->sdma_rb_node->refcount == 0) { + rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); + kfree(pq->sdma_rb_node); + } dma_pool_destroy(pq->header_cache); + kmem_cache_destroy(pq->pkt_slab); kfree(pq); } @@ -626,6 +1127,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq) { struct qib_devdata *dd = ppd->dd; + unsigned long flags; int i; if (!pq) @@ -633,7 +1135,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) { mutex_lock(&pq->lock); - if (list_empty(&pq->sent)) { + if (!pq->num_pending && !pq->num_sending) { mutex_unlock(&pq->lock); break; } @@ -643,29 +1145,44 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, msleep(10); } - if (!list_empty(&pq->sent)) { + if (pq->num_pending || pq->num_sending) { + struct qib_user_sdma_pkt *pkt; + struct qib_user_sdma_pkt *pkt_prev; struct list_head free_list; + mutex_lock(&pq->lock); + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* + * Since we hold sdma_lock, it is safe without sent_lock. + */ + if (pq->num_pending) { + list_for_each_entry_safe(pkt, pkt_prev, + &ppd->sdma_userpending, list) { + if (pkt->pq == pq) { + list_move_tail(&pkt->list, &pq->sent); + pq->num_pending--; + pq->num_sending++; + } + } + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + qib_dev_err(dd, "user sdma lists not empty: forcing!\n"); INIT_LIST_HEAD(&free_list); - mutex_lock(&pq->lock); list_splice_init(&pq->sent, &free_list); + pq->num_sending = 0; qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); mutex_unlock(&pq->lock); } } -static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd, +static inline __le64 qib_sdma_make_desc0(u8 gen, u64 addr, u64 dwlen, u64 dwoffset) { - u8 tmpgen; - - tmpgen = ppd->sdma_generation; - return cpu_to_le64(/* SDmaPhyAddr[31:0] */ ((addr & 0xfffffffcULL) << 32) | /* SDmaGeneration[1:0] */ - ((tmpgen & 3ULL) << 30) | + ((gen & 3ULL) << 30) | /* SDmaDwordCount[10:0] */ ((dwlen & 0x7ffULL) << 16) | /* SDmaBufOffset[12:2] */ @@ -691,7 +1208,7 @@ static inline __le64 qib_sdma_make_desc1(u64 addr) static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, struct qib_user_sdma_pkt *pkt, int idx, - unsigned ofs, u16 tail) + unsigned ofs, u16 tail, u8 gen) { const u64 addr = (u64) pkt->addr[idx].addr + (u64) pkt->addr[idx].offset; @@ -701,105 +1218,159 @@ static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, descqp = &ppd->sdma_descq[tail].qw[0]; - descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs); - if (idx == 0) + descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs); + if (pkt->addr[idx].first_desc) descq0 = qib_sdma_make_first_desc0(descq0); - if (idx == pkt->naddr - 1) + if (pkt->addr[idx].last_desc) { descq0 = qib_sdma_make_last_desc0(descq0); + if (ppd->sdma_intrequest) { + descq0 |= cpu_to_le64(1ULL << 15); + ppd->sdma_intrequest = 0; + } + } descqp[0] = descq0; descqp[1] = qib_sdma_make_desc1(addr); } -/* pq->lock must be held, get packets on the wire... */ -static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, - struct qib_user_sdma_queue *pq, - struct list_head *pktlist) +void qib_user_sdma_send_desc(struct qib_pportdata *ppd, + struct list_head *pktlist) { struct qib_devdata *dd = ppd->dd; - int ret = 0; - unsigned long flags; - u16 tail; - u8 generation; - u64 descq_added; - - if (list_empty(pktlist)) - return 0; - - if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) - return -ECOMM; - - spin_lock_irqsave(&ppd->sdma_lock, flags); + u16 nfree, nsent; + u16 tail, tail_c; + u8 gen, gen_c; - /* keep a copy for restoring purposes in case of problems */ - generation = ppd->sdma_generation; - descq_added = ppd->sdma_descq_added; - - if (unlikely(!__qib_sdma_running(ppd))) { - ret = -ECOMM; - goto unlock; - } + nfree = qib_sdma_descq_freecnt(ppd); + if (!nfree) + return; - tail = ppd->sdma_descq_tail; +retry: + nsent = 0; + tail_c = tail = ppd->sdma_descq_tail; + gen_c = gen = ppd->sdma_generation; while (!list_empty(pktlist)) { struct qib_user_sdma_pkt *pkt = list_entry(pktlist->next, struct qib_user_sdma_pkt, list); - int i; + int i, j, c = 0; unsigned ofs = 0; u16 dtail = tail; - if (pkt->naddr > qib_sdma_descq_freecnt(ppd)) - goto unlock_check_tail; - - for (i = 0; i < pkt->naddr; i++) { - qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail); + for (i = pkt->index; i < pkt->naddr && nfree; i++) { + qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen); ofs += pkt->addr[i].length >> 2; if (++tail == ppd->sdma_descq_cnt) { tail = 0; - ++ppd->sdma_generation; + ++gen; + ppd->sdma_intrequest = 1; + } else if (tail == (ppd->sdma_descq_cnt>>1)) { + ppd->sdma_intrequest = 1; } - } - - if ((ofs << 2) > ppd->ibmaxlen) { - ret = -EMSGSIZE; - goto unlock; - } + nfree--; + if (pkt->addr[i].last_desc == 0) + continue; - /* - * If the packet is >= 2KB mtu equivalent, we have to use - * the large buffers, and have to mark each descriptor as - * part of a large buffer packet. - */ - if (ofs > dd->piosize2kmax_dwords) { - for (i = 0; i < pkt->naddr; i++) { - ppd->sdma_descq[dtail].qw[0] |= - cpu_to_le64(1ULL << 14); - if (++dtail == ppd->sdma_descq_cnt) - dtail = 0; + /* + * If the packet is >= 2KB mtu equivalent, we + * have to use the large buffers, and have to + * mark each descriptor as part of a large + * buffer packet. + */ + if (ofs > dd->piosize2kmax_dwords) { + for (j = pkt->index; j <= i; j++) { + ppd->sdma_descq[dtail].qw[0] |= + cpu_to_le64(1ULL << 14); + if (++dtail == ppd->sdma_descq_cnt) + dtail = 0; + } } + c += i + 1 - pkt->index; + pkt->index = i + 1; /* index for next first */ + tail_c = dtail = tail; + gen_c = gen; + ofs = 0; /* reset for next packet */ } - ppd->sdma_descq_added += pkt->naddr; - pkt->added = ppd->sdma_descq_added; - list_move_tail(&pkt->list, &pq->sent); - ret++; + ppd->sdma_descq_added += c; + nsent += c; + if (pkt->index == pkt->naddr) { + pkt->added = ppd->sdma_descq_added; + pkt->pq->added = pkt->added; + pkt->pq->num_pending--; + spin_lock(&pkt->pq->sent_lock); + pkt->pq->num_sending++; + list_move_tail(&pkt->list, &pkt->pq->sent); + spin_unlock(&pkt->pq->sent_lock); + } + if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt) + break; } -unlock_check_tail: /* advance the tail on the chip if necessary */ - if (ppd->sdma_descq_tail != tail) - dd->f_sdma_update_tail(ppd, tail); + if (ppd->sdma_descq_tail != tail_c) { + ppd->sdma_generation = gen_c; + dd->f_sdma_update_tail(ppd, tail_c); + } + + if (nfree && !list_empty(pktlist)) + goto retry; + + return; +} + +/* pq->lock must be held, get packets on the wire... */ +static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + struct list_head *pktlist, int count) +{ + unsigned long flags; + + if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) + return -ECOMM; -unlock: - if (unlikely(ret < 0)) { - ppd->sdma_generation = generation; - ppd->sdma_descq_added = descq_added; + /* non-blocking mode */ + if (pq->sdma_rb_node->refcount > 1) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + pq->num_pending += count; + list_splice_tail_init(pktlist, &ppd->sdma_userpending); + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return 0; } - spin_unlock_irqrestore(&ppd->sdma_lock, flags); - return ret; + /* In this case, descriptors from this process are not + * linked to ppd pending queue, interrupt handler + * won't update this process, it is OK to directly + * modify without sdma lock. + */ + + + pq->num_pending += count; + /* + * Blocking mode for single rail process, we must + * release/regain sdma_lock to give other process + * chance to make progress. This is important for + * performance. + */ + do { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + qib_user_sdma_send_desc(ppd, pktlist); + if (!list_empty(pktlist)) + qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } while (!list_empty(pktlist)); + + return 0; } int qib_user_sdma_writev(struct qib_ctxtdata *rcd, @@ -821,19 +1392,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, if (!qib_sdma_running(ppd)) goto done_unlock; - if (ppd->sdma_descq_added != ppd->sdma_descq_removed) { + /* if I have packets not complete yet */ + if (pq->added > ppd->sdma_descq_removed) qib_user_sdma_hwqueue_clean(ppd); + /* if I have complete packets to be freed */ + if (pq->num_sending) qib_user_sdma_queue_clean(ppd, pq); - } while (dim) { - const int mxp = 8; + int mxp = 1; + int ndesc = 0; - down_write(¤t->mm->mmap_sem); - ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); - up_write(¤t->mm->mmap_sem); - - if (ret <= 0) + ret = qib_user_sdma_queue_pkts(dd, ppd, pq, + iov, dim, &list, &mxp, &ndesc); + if (ret < 0) goto done_unlock; else { dim -= ret; @@ -843,24 +1415,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, /* force packets onto the sdma hw queue... */ if (!list_empty(&list)) { /* - * Lazily clean hw queue. the 4 is a guess of about - * how many sdma descriptors a packet will take (it - * doesn't have to be perfect). + * Lazily clean hw queue. */ - if (qib_sdma_descq_freecnt(ppd) < ret * 4) { + if (qib_sdma_descq_freecnt(ppd) < ndesc) { qib_user_sdma_hwqueue_clean(ppd); - qib_user_sdma_queue_clean(ppd, pq); + if (pq->num_sending) + qib_user_sdma_queue_clean(ppd, pq); } - ret = qib_user_sdma_push_pkts(ppd, pq, &list); + ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp); if (ret < 0) goto done_unlock; else { - npkts += ret; - pq->counter += ret; - - if (!list_empty(&list)) - goto done_unlock; + npkts += mxp; + pq->counter += mxp; } } } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 9fab4048885..9bcfbd84298 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -35,14 +35,16 @@ #include <rdma/ib_mad.h> #include <rdma/ib_user_verbs.h> #include <linux/io.h> +#include <linux/module.h> #include <linux/utsname.h> #include <linux/rculist.h> #include <linux/mm.h> +#include <linux/random.h> #include "qib.h" #include "qib_common.h" -static unsigned int ib_qib_qp_table_size = 251; +static unsigned int ib_qib_qp_table_size = 256; module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); @@ -181,7 +183,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -222,7 +224,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -331,7 +333,8 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) * @qp: the QP to post on * @wr: the work request to send */ -static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr) +static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, + int *scheduled) { struct qib_swqe *wqe; u32 next; @@ -433,11 +436,17 @@ bail_inval_free: while (j) { struct qib_sge *sge = &wqe->sg_list[--j]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } bail_inval: ret = -EINVAL; bail: + if (!ret && !wr->next && + !qib_sdma_empty( + dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) { + qib_schedule_send(qp); + *scheduled = 1; + } spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -455,9 +464,10 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct qib_qp *qp = to_iqp(ibqp); int err = 0; + int scheduled = 0; for (; wr; wr = wr->next) { - err = qib_post_one_send(qp, wr); + err = qib_post_one_send(qp, wr, &scheduled); if (err) { *bad_wr = wr; goto bail; @@ -465,7 +475,8 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } /* Try to do the send work in the caller's context. */ - qib_do_send(&qp->s_work); + if (!scheduled) + qib_do_send(&qp->s_work); bail: return err; @@ -634,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) } else goto drop; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - ibp->opstats[opcode & 0x7f].n_bytes += tlen; - ibp->opstats[opcode & 0x7f].n_packets++; + opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; +#ifdef CONFIG_DEBUG_FS + rcd->opstats->stats[opcode].n_bytes += tlen; + rcd->opstats->stats[opcode].n_packets++; +#endif /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; @@ -649,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); if (mcast == NULL) goto drop; - ibp->n_multicast_rcv++; + this_cpu_inc(ibp->pmastats->n_multicast_rcv); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); /* @@ -659,17 +672,25 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (atomic_dec_return(&mcast->refcount) <= 1) wake_up(&mcast->wait); } else { - qp = qib_lookup_qpn(ibp, qp_num); - if (!qp) - goto drop; - ibp->n_unicast_rcv++; + if (rcd->lookaside_qp) { + if (rcd->lookaside_qpn != qp_num) { + if (atomic_dec_and_test( + &rcd->lookaside_qp->refcount)) + wake_up( + &rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } + } + if (!rcd->lookaside_qp) { + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + rcd->lookaside_qp = qp; + rcd->lookaside_qpn = qp_num; + } else + qp = rcd->lookaside_qp; + this_cpu_inc(ibp->pmastats->n_unicast_rcv); qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); - /* - * Notify qib_destroy_qp() if it is waiting - * for us to finish. - */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); } return; @@ -903,8 +924,8 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, __raw_writel(last, piobuf); } -static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, - struct qib_qp *qp, int *retp) +static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, + struct qib_qp *qp) { struct qib_verbs_txreq *tx; unsigned long flags; @@ -916,8 +937,9 @@ static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, struct list_head *l = dev->txreq_free.next; list_del(l); + spin_unlock(&dev->pending_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); tx = list_entry(l, struct qib_verbs_txreq, txreq.list); - *retp = 0; } else { if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK && list_empty(&qp->iowait)) { @@ -925,14 +947,33 @@ static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, qp->s_flags |= QIB_S_WAIT_TX; list_add_tail(&qp->iowait, &dev->txwait); } - tx = NULL; qp->s_flags &= ~QIB_S_BUSY; - *retp = -EBUSY; + spin_unlock(&dev->pending_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); + tx = ERR_PTR(-EBUSY); } + return tx; +} - spin_unlock(&dev->pending_lock); - spin_unlock_irqrestore(&qp->s_lock, flags); +static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, + struct qib_qp *qp) +{ + struct qib_verbs_txreq *tx; + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + /* assume the list non empty */ + if (likely(!list_empty(&dev->txreq_free))) { + struct list_head *l = dev->txreq_free.next; + list_del(l); + spin_unlock_irqrestore(&dev->pending_lock, flags); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + } else { + /* call slow path to get the extra lock */ + spin_unlock_irqrestore(&dev->pending_lock, flags); + tx = __get_txreq(dev, qp); + } return tx; } @@ -948,7 +989,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); if (tx->mr) { - atomic_dec(&tx->mr->refcount); + qib_put_mr(tx->mr); tx->mr = NULL; } if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { @@ -1112,9 +1153,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, goto bail; } - tx = get_txreq(dev, qp, &ret); - if (!tx) - goto bail; + tx = get_txreq(dev, qp); + if (IS_ERR(tx)) + goto bail_tx; control = dd->f_setpbc_control(ppd, plen, qp->s_srate, be16_to_cpu(hdr->lrh[0]) >> 12); @@ -1185,6 +1226,9 @@ unaligned: ibp->n_unaligned++; bail: return ret; +bail_tx: + ret = PTR_ERR(tx); + goto bail; } /* @@ -1303,7 +1347,7 @@ done: } qib_sendbuf_done(dd, pbufn); if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_wqe) { @@ -1812,6 +1856,23 @@ bail: return ret; } +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) +{ + struct ib_ah_attr attr; + struct ib_ah *ah = ERR_PTR(-EINVAL); + struct qib_qp *qp0; + + memset(&attr, 0, sizeof attr); + attr.dlid = dlid; + attr.port_num = ppd_from_ibp(ibp)->port; + rcu_read_lock(); + qp0 = rcu_dereference(ibp->qp0); + if (qp0) + ah = ib_create_ah(qp0->ibqp.pd, &attr); + rcu_read_unlock(); + return ah; +} + /** * qib_destroy_ah - destroy an address handle * @ibah: the AH to destroy @@ -1974,6 +2035,8 @@ static void init_ibport(struct qib_pportdata *ppd) ibp->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; ibp->z_vl15_dropped = cntrs.vl15_dropped; + RCU_INIT_POINTER(ibp->qp0, NULL); + RCU_INIT_POINTER(ibp->qp1, NULL); } /** @@ -1990,12 +2053,15 @@ int qib_register_ib_device(struct qib_devdata *dd) int ret; dev->qp_table_size = ib_qib_qp_table_size; - dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table, + get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); + dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table, GFP_KERNEL); if (!dev->qp_table) { ret = -ENOMEM; goto err_qpt; } + for (i = 0; i < dev->qp_table_size; i++) + RCU_INIT_POINTER(dev->qp_table[i], NULL); for (i = 0; i < dd->num_pports; i++) init_ibport(ppd + i); @@ -2022,13 +2088,15 @@ int qib_register_ib_device(struct qib_devdata *dd) spin_lock_init(&dev->lk_table.lock); dev->lk_table.max = 1 << ib_qib_lkey_table_size; lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - dev->lk_table.table = (struct qib_mregion **) + dev->lk_table.table = (struct qib_mregion __rcu **) __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); if (dev->lk_table.table == NULL) { ret = -ENOMEM; goto err_lk; } - memset(dev->lk_table.table, 0, lk_tab_size); + RCU_INIT_POINTER(dev->dma_mr, NULL); + for (i = 0; i < dev->lk_table.max; i++) + RCU_INIT_POINTER(dev->lk_table.table[i], NULL); INIT_LIST_HEAD(&dev->pending_mmaps); spin_lock_init(&dev->pending_lock); dev->mmap_offset = PAGE_SIZE; @@ -2158,7 +2226,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->dma_ops = &qib_dma_mapping_ops; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), - QIB_IDSTR " %s", init_utsname()->nodename); + "Intel Infiniband HCA %s", init_utsname()->nodename); ret = ib_register_device(ibdev, qib_create_port_files); if (ret) @@ -2168,7 +2236,8 @@ int qib_register_ib_device(struct qib_devdata *dd) if (ret) goto err_agents; - if (qib_verbs_register_sysfs(dd)) + ret = qib_verbs_register_sysfs(dd); + if (ret) goto err_class; goto bail; @@ -2251,3 +2320,17 @@ void qib_unregister_ib_device(struct qib_devdata *dd) get_order(lk_tab_size)); kfree(dev->qp_table); } + +/* + * This must be called with s_lock held. + */ +void qib_schedule_send(struct qib_qp *qp) +{ + if (qib_send_ok(qp)) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + queue_work(ppd->qib_wq, &qp->s_work); + } +} diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index bd57c127322..bfc8948fdd3 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,6 +41,8 @@ #include <linux/interrupt.h> #include <linux/kref.h> #include <linux/workqueue.h> +#include <linux/kthread.h> +#include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> @@ -148,14 +150,14 @@ struct ib_reth { __be64 vaddr; __be32 rkey; __be32 length; -} __attribute__ ((packed)); +} __packed; struct ib_atomic_eth { __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ __be32 rkey; __be64 swap_data; __be64 compare_data; -} __attribute__ ((packed)); +} __packed; struct qib_other_headers { __be32 bth[3]; @@ -176,7 +178,7 @@ struct qib_other_headers { __be32 aeth; struct ib_atomic_eth atomic_eth; } u; -} __attribute__ ((packed)); +} __packed; /* * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes @@ -193,12 +195,12 @@ struct qib_ib_header { } l; struct qib_other_headers oth; } u; -} __attribute__ ((packed)); +} __packed; struct qib_pio_header { __le32 pbc[2]; struct qib_ib_header hdr; -} __attribute__ ((packed)); +} __packed; /* * There is one struct qib_mcast for each multicast GID. @@ -266,7 +268,8 @@ struct qib_cq_wc { */ struct qib_cq { struct ib_cq ibcq; - struct work_struct comptask; + struct kthread_work comptask; + struct qib_devdata *dd; spinlock_t lock; /* protect changes in this struct */ u8 notify; u8 triggered; @@ -301,6 +304,10 @@ struct qib_mregion { int access_flags; u32 max_segs; /* number of qib_segs in all the arrays */ u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + struct rcu_head list; atomic_t refcount; struct qib_segarray *map[0]; /* the segments */ }; @@ -366,9 +373,10 @@ struct qib_rwq { struct qib_rq { struct qib_rwq *wq; - spinlock_t lock; /* protect changes in this struct */ u32 size; /* size of RWQE array */ u8 max_sge; + spinlock_t lock /* protect changes in this struct */ + ____cacheline_aligned_in_smp; }; struct qib_srq { @@ -411,32 +419,75 @@ struct qib_ack_entry { */ struct qib_qp { struct ib_qp ibqp; - struct qib_qp *next; /* link list for QPN hash table */ - struct qib_qp *timer_next; /* link list for qib_ib_timer() */ - struct list_head iowait; /* link for wait PIO buf */ - struct list_head rspwait; /* link for waititing to respond */ + /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; - struct qib_ib_header s_hdr; /* next packet header to send */ - atomic_t refcount; - wait_queue_head_t wait; - wait_queue_head_t wait_dma; - struct timer_list s_timer; - struct work_struct s_work; + struct qib_qp __rcu *next; /* link list for QPN hash table */ + struct qib_swqe *s_wq; /* send work queue */ struct qib_mmap_info *ip; + struct qib_ib_header *s_hdr; /* next packet header to send */ + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + + u8 state; /* QP state */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + + struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct qib_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waititing to respond */ + + struct qib_sge_state r_sge; /* current receive data */ + struct qib_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; struct qib_sge_state *s_cur_sge; + u32 s_flags; struct qib_verbs_txreq *s_tx; - struct qib_mregion *s_rdma_mr; + struct qib_swqe *s_wqe; struct qib_sge_state s_sge; /* current send request data */ - struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]; - struct qib_sge_state s_ack_rdma_sge; - struct qib_sge_state s_rdma_read_sge; - struct qib_sge_state r_sge; /* current receive data */ - spinlock_t r_lock; /* used for APM */ - spinlock_t s_lock; + struct qib_mregion *s_rdma_mr; atomic_t s_dma_busy; - unsigned processor_id; /* Processor ID QP is bound to */ - u32 s_flags; u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ @@ -447,58 +498,34 @@ struct qib_qp { u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u64 r_wr_id; /* ID for current receive WQE */ - unsigned long r_aflags; - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; - u8 state; /* QP state */ u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_state; /* opcode of last packet received */ u8 r_nak_state; /* non-zero if NAK is pending */ - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_flags; - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - u8 qp_access_flags; - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - u8 s_srate; - u8 s_draining; - u8 s_mig_state; - u8 timeout; /* Timeout for this QP */ - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 port_num; - enum ib_mtu path_mtu; - u32 remote_qpn; - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - struct qib_swqe *s_wq; /* send work queue */ - struct qib_swqe *s_wqe; - struct qib_rq r_rq; /* receive work queue */ - struct qib_sge r_sg_list[0]; /* verified SGEs */ + + struct qib_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + struct list_head iowait; /* link for wait PIO buf */ + + struct work_struct s_work; + + wait_queue_head_t wait_dma; + + struct qib_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; }; /* @@ -625,7 +652,7 @@ struct qib_lkey_table { u32 next; /* next unused index (speeds search) */ u32 gen; /* generation count */ u32 max; /* size of the table */ - struct qib_mregion **table; + struct qib_mregion __rcu **table; }; struct qib_opcode_stats { @@ -633,9 +660,20 @@ struct qib_opcode_stats { u64 n_bytes; /* total number of bytes */ }; +struct qib_opcode_stats_perctx { + struct qib_opcode_stats stats[128]; +}; + +struct qib_pma_counters { + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ +}; + struct qib_ibport { - struct qib_qp *qp0; - struct qib_qp *qp1; + struct qib_qp __rcu *qp0; + struct qib_qp __rcu *qp1; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ struct qib_ah *sm_ah; struct qib_ah *smi_ah; @@ -649,10 +687,11 @@ struct qib_ibport { __be64 mkey; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ u64 tid; /* TID for traps */ - u64 n_unicast_xmit; /* total unicast packets sent */ - u64 n_unicast_rcv; /* total unicast packets received */ - u64 n_multicast_xmit; /* total multicast packets sent */ - u64 n_multicast_rcv; /* total multicast packets received */ + struct qib_pma_counters __percpu *pmastats; + u64 z_unicast_xmit; /* starting count for PMA */ + u64 z_unicast_rcv; /* starting count for PMA */ + u64 z_multicast_xmit; /* starting count for PMA */ + u64 z_multicast_rcv; /* starting count for PMA */ u64 z_symbol_error_counter; /* starting count for PMA */ u64 z_link_error_recovery_counter; /* starting count for PMA */ u64 z_link_downed_counter; /* starting count for PMA */ @@ -699,15 +738,15 @@ struct qib_ibport { u8 vl_high_limit; u8 sl_to_vl[16]; - struct qib_opcode_stats opstats[128]; }; + struct qib_ibdev { struct ib_device ibdev; struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; - struct qib_mregion *dma_mr; + struct qib_mregion __rcu *dma_mr; /* QP numbers are shared by all IB ports */ struct qib_qpn_table qpn_table; @@ -718,12 +757,13 @@ struct qib_ibdev { struct list_head memwait; /* list for wait kernel memory */ struct list_head txreq_free; struct timer_list mem_timer; - struct qib_qp **qp_table; + struct qib_qp __rcu **qp_table; struct qib_pio_header *pio_hdrs; dma_addr_t pio_hdrs_phys; /* list of QPs waiting for RNR timer */ spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */ - unsigned qp_table_size; /* size of the hash table */ + u32 qp_table_size; /* size of the hash table */ + u32 qp_rnd; /* random bytes for hash */ spinlock_t qpt_lock; u32 n_piowait; @@ -741,6 +781,10 @@ struct qib_ibdev { spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ spinlock_t n_mcast_grps_lock; +#ifdef CONFIG_DEBUG_FS + /* per HCA debugfs */ + struct dentry *qib_ibdev_dbg; +#endif }; struct qib_verbs_counters { @@ -805,22 +849,10 @@ static inline int qib_send_ok(struct qib_qp *qp) !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); } -extern struct workqueue_struct *qib_wq; -extern struct workqueue_struct *qib_cq_wq; - /* * This must be called with s_lock held. */ -static inline void qib_schedule_send(struct qib_qp *qp) -{ - if (qib_send_ok(qp)) { - if (qp->processor_id == smp_processor_id()) - queue_work(qib_wq, &qp->s_work); - else - queue_work_on(qp->processor_id, - qib_wq, &qp->s_work); - } -} +void qib_schedule_send(struct qib_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) { @@ -893,6 +925,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt); void qib_free_qpn_table(struct qib_qpn_table *qpt); +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev); + +int qib_qp_iter_next(struct qib_qp_iter *iter); + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); + +#endif + void qib_get_credit(struct qib_qp *qp, u32 aeth); unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); @@ -917,6 +961,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); + void qib_rc_rnr_retry(unsigned long arg); void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr); @@ -928,9 +974,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, void *data, u32 tlen, struct qib_qp *qp); -int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr); +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); -int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr); +void qib_free_lkey(struct qib_mregion *mr); int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, struct qib_sge *isge, struct ib_sge *sge, int acc); @@ -953,6 +999,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int qib_destroy_srq(struct ib_srq *ibsrq); +int qib_cq_init(struct qib_devdata *dd); + +void qib_cq_exit(struct qib_devdata *dd); + void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); @@ -998,6 +1048,29 @@ int qib_unmap_fmr(struct list_head *fmr_list); int qib_dealloc_fmr(struct ib_fmr *ibfmr); +static inline void qib_get_mr(struct qib_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +void mr_rcu_callback(struct rcu_head *list); + +static inline void qib_put_mr(struct qib_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + call_rcu(&mr->list, mr_rcu_callback); +} + +static inline void qib_put_ss(struct qib_sge_state *ss) +{ + while (ss->num_sge) { + qib_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + + void qib_release_mmap_info(struct kref *ref); struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index 561b8bca406..1d7281c5a02 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -102,10 +103,10 @@ int qib_enable_wc(struct qib_devdata *dd) u64 atmp; atmp = pioaddr & ~(piolen - 1); if (atmp < addr || (atmp + piolen) > (addr + len)) { - qib_dev_err(dd, "No way to align address/size " - "(%llx/%llx), no WC mtrr\n", - (unsigned long long) atmp, - (unsigned long long) piolen << 1); + qib_dev_err(dd, + "No way to align address/size (%llx/%llx), no WC mtrr\n", + (unsigned long long) atmp, + (unsigned long long) piolen << 1); ret = -ENODEV; } else { pioaddr = atmp; @@ -120,8 +121,7 @@ int qib_enable_wc(struct qib_devdata *dd) if (cookie < 0) { { qib_devinfo(dd->pcidev, - "mtrr_add() WC for PIO bufs " - "failed (%d)\n", + "mtrr_add() WC for PIO bufs failed (%d)\n", cookie); ret = -EINVAL; } |
